int-brain-lab
diff --git a/‎brainbox/io/one.py‎
Lines changed: 73 additions & 30 deletions b/‎brainbox/io/one.py‎
Lines changed: 73 additions & 30 deletions
diff --git a/‎ibllib/dsp/cadzow.py‎
Lines changed: 17 additions & 6 deletions b/‎ibllib/dsp/cadzow.py‎
Lines changed: 17 additions & 6 deletions
diff --git a/‎ibllib/dsp/voltage.py‎
Lines changed: 3 additions & 1 deletion b/‎ibllib/dsp/voltage.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎ibllib/ephys/ephysqc.py‎
Lines changed: 39 additions & 13 deletions b/‎ibllib/ephys/ephysqc.py‎
Lines changed: 39 additions & 13 deletions
@@ -108,7 +108,8 @@ def _channels_alf2bunch(channels, brain_regions=None):
     return channels_
 
 
-def _load_spike_sorting(eid, one=None, collection=None, revision=None, return_channels=True, dataset_types=None):
+def _load_spike_sorting(eid, one=None, collection=None, revision=None, return_channels=True, dataset_types=None,
+                        brain_regions=None):
     """
     Generic function to load spike sortin according to one searchwords
     Will try to load one spike sorting for any probe present for the eid matching the collection
@@ -121,6 +122,7 @@ def _load_spike_sorting(eid, one=None, collection=None, revision=None, return_ch
     :param collection: collection filter word - accepts wildcard - can be a combination of spike sorter and probe
     :param revision: revision to load
     :param return_channels: True
+    :param brain_regions: ibllib.atlas.regions.BrainRegions object - will label acronyms if provided
     :return:
     """
     one = one or ONE()
@@ -140,7 +142,8 @@ def _load_spike_sorting(eid, one=None, collection=None, revision=None, return_ch
         clusters[pname] = one.load_object(eid, collection=probe_collection, obj='clusters',
                                           attribute=cluster_attributes)
 
-    channels = _load_channels_locations_from_disk(eid, collection=collection, one=one, revision=revision)
+    channels = _load_channels_locations_from_disk(eid, collection=collection, one=one, revision=revision,
+                                                  brain_regions=brain_regions)
 
     if return_channels:
         return spikes, clusters, channels
@@ -179,31 +182,42 @@ def _load_channels_locations_from_disk(eid, collection=None, one=None, revision=
             _logger.debug(f"looking for a resolved alignment dataset in {aligned_channel_collections}")
             ac_collection = _get_spike_sorting_collection(aligned_channel_collections, probe)
             channels_aligned = one.load_object(eid, 'channels', collection=ac_collection)
-            # oftentimes the channel map for different spike sorters may be different so interpolate the alignment onto
-            nch = channels[probe]['localCoordinates'].shape[0]
-            # if there is no spike sorting in the base folder, the alignment doesn't have the localCoordinates field
-            # so we reconstruct from the Neuropixel map. This only happens for early pykilosort sorts
-            if 'localCoordinates' in channels_aligned.keys():
-                aligned_depths = channels_aligned['localCoordinates'][:, 1]
-            else:
-                assert channels_aligned['mlapdv'].shape[0] == 384
-                NEUROPIXEL_VERSION = 1
-                from ibllib.ephys.neuropixel import trace_header
-                aligned_depths = trace_header(version=NEUROPIXEL_VERSION)['y']
-            depth_aligned, ind_aligned = np.unique(aligned_depths, return_index=True)
-            depths, ind, iinv = np.unique(channels[probe]['localCoordinates'][:, 1], return_index=True, return_inverse=True)
-            channels[probe]['mlapdv'] = np.zeros((nch, 3))
-            for i in np.arange(3):
-                channels[probe]['mlapdv'][:, i] = np.interp(
-                    depths, depth_aligned, channels_aligned['mlapdv'][ind_aligned, i])[iinv]
-            # the brain locations have to be interpolated by nearest neighbour
-            fcn_interp = interp1d(depth_aligned, channels_aligned['brainLocationIds_ccf_2017'][ind_aligned], kind='nearest')
-            channels[probe]['brainLocationIds_ccf_2017'] = fcn_interp(depths)[iinv].astype(np.int32)
+            channels[probe] = channel_locations_interpolation(channels_aligned, channels[probe])
             # only have to reformat channels if we were able to load coordinates from disk
             channels[probe] = _channels_alf2bunch(channels[probe], brain_regions=brain_regions)
     return channels
 
 
+def channel_locations_interpolation(channels_aligned, channels):
+    """
+    oftentimes the channel map for different spike sorters may be different so interpolate the alignment onto
+    if there is no spike sorting in the base folder, the alignment doesn't have the localCoordinates field
+    so we reconstruct from the Neuropixel map. This only happens for early pykilosort sorts
+    :param channels_aligned: Bunch or dictionary of aligned channels containing at least keys
+     'mlapdv' and 'brainLocationIds_ccf_2017' - those are the guide for the interpolation
+    :param channels: Bunch or dictionary of aligned channels containing at least keys 'localCoordinates'
+    :return: Bunch or dictionary of channels with extra keys 'mlapdv' and 'brainLocationIds_ccf_2017'
+    """
+    nch = channels['localCoordinates'].shape[0]
+    if 'localCoordinates' in channels_aligned.keys():
+        aligned_depths = channels_aligned['localCoordinates'][:, 1]
+    else:
+        assert channels_aligned['mlapdv'].shape[0] == 384
+        NEUROPIXEL_VERSION = 1
+        from ibllib.ephys.neuropixel import trace_header
+        aligned_depths = trace_header(version=NEUROPIXEL_VERSION)['y']
+    depth_aligned, ind_aligned = np.unique(aligned_depths, return_index=True)
+    depths, ind, iinv = np.unique(channels['localCoordinates'][:, 1], return_index=True, return_inverse=True)
+    channels['mlapdv'] = np.zeros((nch, 3))
+    for i in np.arange(3):
+        channels['mlapdv'][:, i] = np.interp(
+            depths, depth_aligned, channels_aligned['mlapdv'][ind_aligned, i])[iinv]
+    # the brain locations have to be interpolated by nearest neighbour
+    fcn_interp = interp1d(depth_aligned, channels_aligned['brainLocationIds_ccf_2017'][ind_aligned], kind='nearest')
+    channels['brainLocationIds_ccf_2017'] = fcn_interp(depths)[iinv].astype(np.int32)
+    return channels
+
+
 def _load_channel_locations_traj(eid, probe=None, one=None, revision=None, aligned=False,
                                  brain_atlas=None):
     print('from traj')
@@ -309,17 +323,43 @@ def load_channel_locations(eid, probe=None, one=None, aligned=False, brain_atlas
     return channels
 
 
-def load_spike_sorting_fast(eid, probe=None, spike_sorter=None, **kwargs):
+def load_spike_sorting_fast(eid, one=None, probe=None, dataset_types=None, spike_sorter=None, revision=None,
+                            brain_regions=None, nested=True):
     """
-    Same as load_spike_sorting but with return_channels=True
+    From an eid, loads spikes and clusters for all probes
+    The following set of dataset types are loaded:
+        'clusters.channels',
+        'clusters.depths',
+        'clusters.metrics',
+        'spikes.clusters',
+        'spikes.times',
+        'probes.description'
+    :param eid: experiment UUID or pathlib.Path of the local session
+    :param one: an instance of OneAlyx
+    :param probe: name of probe to load in, if not given all probes for session will be loaded
+    :param dataset_types: additional spikes/clusters objects to add to the standard default list
+    :param spike_sorter: name of the spike sorting you want to load (None for default)
+    :param return_channels: (bool) defaults to False otherwise tries and load channels from disk
+    :param brain_regions: ibllib.atlas.regions.BrainRegions object - will label acronyms if provided
+    :param nested: if a single probe is required, do not output a dictionary with the probe name as key
+    :return: spikes, clusters (dict of bunch, 1 bunch per probe)
     """
     collection = _collection_filter_from_args(probe, spike_sorter)
     _logger.debug(f"load spike sorting with collection filter {collection}")
-    return _load_spike_sorting(eid, collection=collection, return_channels=True, **kwargs)
+    kwargs = dict(eid=eid, one=one, collection=collection, revision=revision, dataset_types=dataset_types,
+                  brain_regions=brain_regions)
+    spikes, clusters, channels = _load_spike_sorting(**kwargs, return_channels=True)
+    clusters = merge_clusters_channels(clusters, channels, keys_to_add_extra=None)
+    if nested is False:
+        k = list(spikes.keys())[0]
+        channels = channels[k]
+        clusters = clusters[k]
+        spikes = spikes[k]
+    return spikes, clusters, channels
 
 
-def load_spike_sorting(eid, one=None, probe=None, dataset_types=None,
-                       spike_sorter=None, revision=None, return_channels=False):
+def load_spike_sorting(eid, one=None, probe=None, dataset_types=None, spike_sorter=None, revision=None,
+                       brain_regions=None):
     """
     From an eid, loads spikes and clusters for all probes
     The following set of dataset types are loaded:
@@ -335,12 +375,15 @@ def load_spike_sorting(eid, one=None, probe=None, dataset_types=None,
     :param dataset_types: additional spikes/clusters objects to add to the standard default list
     :param spike_sorter: name of the spike sorting you want to load (None for default)
     :param return_channels: (bool) defaults to False otherwise tries and load channels from disk
-    :return: spikes, clusters, channels (dict of bunch, 1 bunch per probe)
+    :param brain_regions: ibllib.atlas.regions.BrainRegions object - will label acronyms if provided
+    :return: spikes, clusters (dict of bunch, 1 bunch per probe)
     """
     collection = _collection_filter_from_args(probe, spike_sorter)
     _logger.debug(f"load spike sorting with collection filter {collection}")
-    return _load_spike_sorting(eid=eid, one=one, collection=collection, revision=revision,
-                               return_channels=return_channels, dataset_types=dataset_types)
+    spikes, clusters = _load_spike_sorting(eid=eid, one=one, collection=collection, revision=revision,
+                                           return_channels=False, dataset_types=dataset_types,
+                                           brain_regions=brain_regions)
+    return spikes, clusters
 
 
 def load_spike_sorting_with_channel(eid, one=None, probe=None, aligned=False, dataset_types=None,
 
@@ -48,14 +48,25 @@ def trajectory(x, y):
 
 
 def denoise(WAV, x, y, r, imax=None, niter=1):
-    WAV_ = np.zeros_like(WAV)
+    """
+    Applies cadzow denoising by de-ranking spatial matrices in frequency domain
+    :param WAV: np array nc / ns in frequency domain
+    :param x:
+    :param y:
+    :param r:
+    :param imax:
+    :param niter:
+    :return:
+    """
+    WAV_ = np.copy(WAV)
     imax = np.minimum(WAV.shape[-1], imax) if imax else WAV.shape[-1]
     T, it, itr, trcount = trajectory(x, y)
     for ind_f in np.arange(imax):
-        T[it] = WAV[itr, ind_f]
-        T_ = derank(T, r)
-        WAV_[:, ind_f] = np.bincount(itr, weights=np.real(T_[it]))
-        WAV_[:, ind_f] += 1j * np.bincount(itr, weights=np.imag(T_[it]))
-        WAV_[:, ind_f] /= trcount
+        for _ in np.arange(niter):
+            T[it] = WAV_[itr, ind_f]
+            T_ = derank(T, r)
+            WAV_[:, ind_f] = np.bincount(itr, weights=np.real(T_[it]))
+            WAV_[:, ind_f] += 1j * np.bincount(itr, weights=np.imag(T_[it]))
+            WAV_[:, ind_f] /= trcount
 
     return WAV_
@@ -40,11 +40,13 @@ def reject_channels(x, fs, butt_kwargs=None, threshold=0.6, trx=1):
 def agc(x, wl=.5, si=.002, epsilon=1e-8):
     """
     Automatic gain control
+    w_agc, gain = agc(w, wl=.5, si=.002, epsilon=1e-8)
+    such as w_agc / gain = w
     :param x: seismic array (sample last dimension)
     :param wl: window length (secs)
     :param si: sampling interval (secs)
     :param epsilon: whitening (useful mainly for synthetic data)
-    :return:
+    :return: AGC data array, gain applied to data
     """
     ns_win = np.round(wl / si / 2) * 2 + 1
     w = np.hanning(ns_win)
 
@@ -14,7 +14,7 @@
 
 from brainbox.metrics.single_units import spike_sorting_metrics
 from brainbox.io.spikeglx import stream as sglx_streamer
-from ibllib.ephys import sync_probes
+from ibllib.ephys import sync_probes, neuropixel, spikes
 from ibllib.io import spikeglx
 import ibllib.dsp as dsp
 from ibllib.qc import base
@@ -31,6 +31,7 @@
 BATCHES_SPACING = 300
 TMIN = 40
 SAMPLE_LENGTH = 1
+SPIKE_THRESHOLD_UV = -50  # negative, the threshold used for spike detection on pre-processed raw data
 
 
 class EphysQC(base.QC):
@@ -89,6 +90,23 @@ def load_data(self) -> None:
                 bin_file = next(meta_file.parent.glob(f'*{dstype}.*bin'), None)
                 self.data[f'{dstype}'] = spikeglx.Reader(bin_file, open=True) if bin_file is not None else None
 
+    @staticmethod
+    def _compute_metrics_array(raw, fs, h):
+        """
+        From a numpy array, computes rms on raw data, destripes, computes rms on destriped data
+        and performs a simple spike detection
+        :param raw: voltage numpy.array(ntraces, nsamples)
+        :param fs: sampling frequency (Hz)
+        :param h: dictionary containing sensor coordinates, see ibllib.ephys.neuropixel.trace_header
+        :return: 3 numpy vectors nchannels length
+        """
+        destripe = dsp.destripe(raw, fs=fs, neuropixel_version=1)
+        rms_raw = dsp.rms(raw)
+        rms_pre_proc = dsp.rms(destripe)
+        detections = spikes.detection(data=destripe.T, fs=fs, h=h, detect_threshold=SPIKE_THRESHOLD_UV * 1e-6)
+        spike_rate = np.bincount(detections.trace, minlength=raw.shape[0]).astype(np.float32)
+        return rms_raw, rms_pre_proc, spike_rate
+
     def run(self, update: bool = False, overwrite: bool = True, stream: bool = None, **kwargs) -> (str, dict):
         """
         Run QC on samples of the .ap file, and on the entire file for .lf data if it is present.
@@ -109,39 +127,47 @@ def run(self, update: bool = False, overwrite: bool = True, stream: bool = None,
         # TODO: This should go a a separate function once we have a spikeglx.Streamer that behaves like the Reader
         if self.data.ap_meta:
             rms_file = self.probe_path.joinpath("_iblqc_ephysChannels.apRMS.npy")
-            if rms_file.exists() and not overwrite:
+            spike_rate_file = self.probe_path.joinpath("_iblqc_ephysChannels.rawSpikeRates.npy")
+            if all([rms_file.exists(), spike_rate_file.exists()]) and not overwrite:
                 _logger.warning(f'RMS map already exists for .ap data in {self.probe_path}, skipping. '
                                 f'Use overwrite option.')
                 median_rms = np.load(rms_file)
             else:
                 rl = self.data.ap_meta.fileTimeSecs
-                nc = spikeglx._get_nchannels_from_meta(self.data.ap_meta)
+                nsync = len(spikeglx._get_sync_trace_indices_from_meta(self.data.ap_meta))
+                nc = spikeglx._get_nchannels_from_meta(self.data.ap_meta) - nsync
+                neuropixel_version = spikeglx._get_neuropixel_major_version_from_meta(self.data.ap_meta)
+                # verify that the channel layout is correct according to IBL layout
+                h = neuropixel.trace_header(neuropixel_version)
+                th = spikeglx._geometry_from_meta(self.data.ap_meta)
+                if not (np.all(h['x'] == th['x']) and np.all(h['y'] == th['y'])):
+                    _logger.critical("Channel geometry seems incorrect")
+                    raise ValueError("Wrong Neuropixel channel mapping used - ABORT")
                 t0s = np.arange(TMIN, rl - SAMPLE_LENGTH, BATCHES_SPACING)
-                all_rms = np.zeros((2, nc - 1, t0s.shape[0]))
+                all_rms = np.zeros((2, nc, t0s.shape[0]))
+                all_srs = np.zeros((nc, t0s.shape[0]))
                 # If the ap.bin file is not present locally, stream it
                 if self.data.ap is None and self.stream is True:
                     _logger.warning(f'Streaming .ap data to compute RMS samples for probe {self.pid}')
                     for i, t0 in enumerate(tqdm(t0s)):
                         sr, _ = sglx_streamer(self.pid, t0=t0, nsecs=1, one=self.one, remove_cached=True)
-                        raw = sr[:, :-1].T
-                        destripe = dsp.destripe(raw, fs=sr.fs, neuropixel_version=1)
-                        all_rms[0, :, i] = dsp.rms(raw)
-                        all_rms[1, :, i] = dsp.rms(destripe)
+                        raw = sr[:, :-nsync].T
+                        all_rms[0, :, i], all_rms[1, :, i], all_srs[:, i] = self._compute_metrics_array(raw, sr.fs, h)
                 elif self.data.ap is None and self.stream is not True:
                     _logger.warning('Raw .ap data is not available locally. Run with stream=True in order to stream '
                                     'data for calculating RMS samples.')
                 else:
                     _logger.info(f'Computing RMS samples for .ap data using local data in {self.probe_path}')
                     for i, t0 in enumerate(t0s):
                         sl = slice(int(t0 * self.data.ap.fs), int((t0 + SAMPLE_LENGTH) * self.data.ap.fs))
-                        raw = self.data.ap[sl, :-1].T
-                        destripe = dsp.destripe(raw, fs=self.data.ap.fs, neuropixel_version=1)
-                        all_rms[0, :, i] = dsp.rms(raw)
-                        all_rms[1, :, i] = dsp.rms(destripe)
+                        raw = self.data.ap[sl, :-nsync].T
+                        all_rms[0, :, i], all_rms[1, :, i], all_srs[:, i] = self._compute_metrics_array(raw, self.data.ap.fs, h)
                 # Calculate the median RMS across all samples per channel
                 median_rms = np.median(all_rms, axis=-1)
+                median_spike_rate = np.median(all_srs, axis=-1)
                 np.save(rms_file, median_rms)
-            qc_files.append(rms_file)
+                np.save(spike_rate_file, median_spike_rate)
+            qc_files.extend([rms_file, spike_rate_file])
 
             for p in [10, 90]:
                 self.metrics[f'apRms_p{p}_raw'] = np.format_float_scientific(np.percentile(median_rms[0, :], p),