Merge pull request #403 from int-brain-lab/multiparts

oliche · web-flow · commit 03fc88f11839 · 2021-11-04T17:20:31.000Z
lock in GPU tasks - local data handler
diff --git a/brainbox/ephys_plots.py b/brainbox/ephys_plots.py
@@ -1,9 +1,10 @@
 import numpy as np
 from matplotlib import cm
-
+import matplotlib.pyplot as plt
 from brainbox.plot_base import (ImagePlot, ScatterPlot, ProbePlot, LinePlot, plot_line,
                                 plot_image, plot_probe, plot_scatter, arrange_channels2banks)
 from brainbox.processing import bincount2D, compute_cluster_average
+from ibllib.atlas.regions import BrainRegions
 
 
 def image_lfp_spectrum_plot(lfp_power, lfp_freq, chn_coords, chn_inds, freq_range=(0, 300),
@@ -372,3 +373,100 @@ def line_amp_plot(spike_amps, spike_depths, spike_times, chn_coords, d_bin=10, d
         fig, ax = plot_line(data.convert2dict())
         return data.convert2dict(), fig, ax
     return data
+
+
+def plot_brain_regions(channel_ids, channel_depths=None, brain_regions=None, display=True, ax=None):
+    """
+    Plot brain regions along probe, if channel depths is provided will plot along depth otherwise along channel idx
+    :param channel_ids: atlas ids for each channel
+    :param channel_depths: depth along probe for each channel
+    :param brain_regions: BrainRegions object
+    :param display: whether to output plot
+    :param ax: axis to plot on
+    :return:
+    """
+
+    if channel_depths is not None:
+        assert channel_ids.shape[0] == channel_depths.shape[0]
+
+    br = brain_regions or BrainRegions()
+
+    region_info = br.get(channel_ids)
+    boundaries = np.where(np.diff(region_info.id) != 0)[0]
+    boundaries = np.r_[0, boundaries, region_info.id.shape[0] - 1]
+
+    regions = np.c_[boundaries[0:-1], boundaries[1:]]
+    if channel_depths is not None:
+        regions = channel_depths[regions]
+    region_labels = np.c_[np.mean(regions, axis=1), region_info.acronym[boundaries[1:]]]
+    region_colours = region_info.rgb[boundaries[1:]]
+
+    if display:
+        if ax is None:
+            fig, ax = plt.subplots()
+
+        for reg, col in zip(regions, region_colours):
+            height = np.abs(reg[1] - reg[0])
+            color = col / 255
+            ax.bar(x=0.5, height=height, width=1, color=color, bottom=reg[0], edgecolor='w')
+        ax.set_yticks(region_labels[:, 0].astype(int))
+        ax.yaxis.set_tick_params(labelsize=8)
+        ax.get_xaxis().set_visible(False)
+        ax.set_yticklabels(region_labels[:, 1])
+        ax.spines['right'].set_visible(False)
+        ax.spines['top'].set_visible(False)
+        ax.spines['bottom'].set_visible(False)
+
+        return fig, ax
+    else:
+        return regions, region_labels, region_colours
+
+
+def plot_cdf(spike_amps, spike_depths, spike_times, n_amp_bins=10, d_bin=40, amp_range=None, d_range=None,
+             display=False, cmap='hot'):
+    """
+    Plot cumulative amplitude of spikes across depth
+    :param spike_amps:
+    :param spike_depths:
+    :param spike_times:
+    :param n_amp_bins: number of amplitude bins to use
+    :param d_bin: the value of the depth bins in um (default is 40 um)
+    :param amp_range: amp range to use [amp_min, amp_max], if not given automatically computed from spike_amps
+    :param d_range: depth range to use, by default [0, 3840]
+    :param display: whether or not to display plot
+    :param cmap:
+    :return:
+    """
+
+    amp_range = amp_range or np.quantile(spike_amps, (0, 0.9))
+    amp_bins = np.linspace(amp_range[0], amp_range[1], n_amp_bins)
+    d_range = d_range or [0, 3840]
+    depth_bins = np.arange(d_range[0], d_range[1] + d_bin, d_bin)
+    t_bin = np.max(spike_times)
+
+    def histc(x, bins):
+        map_to_bins = np.digitize(x, bins)  # Get indices of the bins to which each value in input array belongs.
+        res = np.zeros(bins.shape)
+
+        for el in map_to_bins:
+            res[el - 1] += 1  # Increment appropriate bin.
+        return res
+
+    cdfs = np.empty((len(depth_bins) - 1, n_amp_bins))
+    for d in range(len(depth_bins) - 1):
+        spikes = np.bitwise_and(spike_depths > depth_bins[d], spike_depths <= depth_bins[d + 1])
+        h = histc(spike_amps[spikes], amp_bins) / t_bin
+        hcsum = np.cumsum(h[::-1])
+        cdfs[d, :] = hcsum[::-1]
+
+    cdfs[cdfs == 0] = np.nan
+
+    data = ImagePlot(cdfs.T, x=amp_bins * 1e6, y=depth_bins[:-1], cmap=cmap)
+    data.set_labels(title='Cumulative Amplitude', xlabel='Spike amplitude (uV)',
+                    ylabel='Distance from probe tip (um)', clabel='Firing Rate (Hz)')
+
+    if display:
+        fig, ax = plot_image(data.convert2dict(), fig_kwargs={'figsize': [3, 7]})
+        return data.convert2dict(), fig, ax
+
+    return data
diff --git a/brainbox/io/spikeglx.py b/brainbox/io/spikeglx.py
@@ -128,9 +128,9 @@ def stream(pid, t0, nsecs=1, one=None, cache_folder=None, remove_cached=False, t
         samples_folder = Path(one.alyx._par.CACHE_DIR).joinpath('cache', typ)
 
     eid, pname = one.pid2eid(pid)
-    cbin_rec = one.list_datasets(eid, collection=f"*{pname}", filename='*ap.*bin', details=True)
-    ch_rec = one.list_datasets(eid, collection=f"*{pname}", filename='*ap.ch', details=True)
-    meta_rec = one.list_datasets(eid, collection=f"*{pname}", filename='*ap.meta', details=True)
+    cbin_rec = one.list_datasets(eid, collection=f"*{pname}", filename=f'*{typ}.*bin', details=True)
+    ch_rec = one.list_datasets(eid, collection=f"*{pname}", filename=f'*{typ}.ch', details=True)
+    meta_rec = one.list_datasets(eid, collection=f"*{pname}", filename=f'*{typ}.meta', details=True)
     ch_file = one._download_datasets(ch_rec)[0]
     one._download_datasets(meta_rec)[0]
 
diff --git a/brainbox/task/passive.py b/brainbox/task/passive.py
@@ -206,8 +206,8 @@ def get_stim_aligned_activity(stim_events, spike_times, spike_depths, z_score_fl
         base_intervals = np.c_[stim_times - base_stim, stim_times - pre_stim]
         out_intervals = stim_intervals[:, 1] > times[-1]
 
-        idx_stim = np.searchsorted(times, stim_intervals)[np.invert(out_intervals)]
-        idx_base = np.searchsorted(times, base_intervals)[np.invert(out_intervals)]
+        idx_stim = np.searchsorted(times, stim_intervals, side='right')[np.invert(out_intervals)]
+        idx_base = np.searchsorted(times, base_intervals, side='right')[np.invert(out_intervals)]
 
         stim_trials = np.zeros((depths.shape[0], n_bins, idx_stim.shape[0]))
         noise_trials = np.zeros((depths.shape[0], n_bins_base, idx_stim.shape[0]))
diff --git a/ibllib/atlas/regions.py b/ibllib/atlas/regions.py
@@ -149,6 +149,17 @@ def _mapping_from_regions_list(self, new_map, lateralize=False):
             mapind = mapind[iregion]
         return mapind
 
+    def remap(self, region_ids, source_map='Allen', target_map='Beryl'):
+        """
+        Remap atlas regions ids from source map to target map
+        :param region_ids: atlas ids to map
+        :param source_map: map name which original region_ids are in
+        :param target_map: map name onto which to map
+        :return:
+        """
+        _, inds = ismember(region_ids, self.id[self.mappings[source_map]])
+        return self.id[self.mappings[target_map][inds]]
+
 
 def regions_from_allen_csv():
     """
diff --git a/ibllib/oneibl/data_handlers.py b/ibllib/oneibl/data_handlers.py
@@ -7,7 +7,6 @@
 import abc
 from time import time
 
-from one.api import ONE
 from one.util import filter_datasets
 from one.alf.files import add_uuid_string
 from iblutil.io.parquet import np2str
@@ -27,10 +26,10 @@ def __init__(self, session_path, signature, one=None):
         :param one: ONE instance
         """
         self.session_path = session_path
-        self.one = one or ONE()
         self.signature = signature
+        self.one = one
 
-    def setup(self):
+    def setUp(self):
         """
         Function to optionally overload to download required data to run task
         :return:
@@ -42,7 +41,8 @@ def getData(self):
         Finds the datasets required for task based on input signatures
         :return:
         """
-
+        if self.one is None:
+            return
         session_datasets = self.one.list_datasets(self.one.path2eid(self.session_path), details=True)
         df = pd.DataFrame(columns=self.one._cache.datasets.columns)
         for file in self.signature['input_files']:
@@ -72,6 +72,17 @@ def cleanUp(self):
         pass
 
 
+class LocalDataHandler(DataHandler):
+    def __init__(self, session_path, signatures, one=None):
+        """
+        Data handler for running tasks locally, with no architecture or db connection
+        :param session_path: path to session
+        :param signature: input and output file signatures
+        :param one: ONE instance
+        """
+        super().__init__(session_path, signatures, one=one)
+
+
 class ServerDataHandler(DataHandler):
     def __init__(self, session_path, signatures, one=None):
         """
diff --git a/ibllib/pipes/tasks.py b/ibllib/pipes/tasks.py
@@ -6,29 +6,30 @@
 import time
 from _collections import OrderedDict
 import traceback
+import json
 
 from graphviz import Digraph
 
 from ibllib.misc import version
-import one.params
 from ibllib.oneibl import data_handlers
-
+import one.params
+from one.api import ONE
 
 _logger = logging.getLogger('ibllib')
 
 
 class Task(abc.ABC):
-    log = ""
-    cpu = 1
-    gpu = 0
+    log = ""  # place holder to keep the log of the task for registratoin
+    cpu = 1   # CPU resource
+    gpu = 0   # GPU resources: as of now, either 0 or 1
     io_charge = 5  # integer percentage
     priority = 30  # integer percentage, 100 means highest priority
     ram = 4  # RAM needed to run (Go)
     one = None  # one instance (optional)
-    level = 0
-    outputs = None
+    level = 0  # level in the pipeline hierarchy: level 0 means there is no parent task
+    outputs = None  # place holder for a list of Path containing output files
     time_elapsed_secs = None
-    time_out_secs = None
+    time_out_secs = 3600 * 2  # time-out after which a task is considered dead
     version = version.ibllib()
     signature = {'input_files': [], 'output_files': []}  # list of tuples (filename, collection, required_flag)
     force = False  # whether or not to re-download missing input files on local server if not present
@@ -69,6 +70,11 @@ def run(self, **kwargs):
         wraps the _run() method with
         -   error management
         -   logging to variable
+        -   writing a lock file if the GPU is used
+        -   labels the status property of the object. The status value is labeled as:
+            0: Complete
+            -1: Errored
+            -2: Didn't run as a lock was encountered
         """
         # if taskid of one properties are not available, local run only without alyx
         use_alyx = self.one is not None and self.taskid is not None
@@ -91,17 +97,20 @@ def run(self, **kwargs):
         # setup
         setup = self.setUp(**kwargs)
         _logger.info(f"Setup value is: {setup}")
+        self.status = 0
         if not setup:
             # case where outputs are present but don't have input files locally to rerun task
             # label task as complete
-            self.status = 0
             _, self.outputs = self.assert_expected_outputs()
-
         else:
             # run task
-            self.status = 0
             start_time = time.time()
             try:
+                if self.gpu >= 1:
+                    if not self._creates_lock():
+                        self.status = -2
+                        _logger.info(f"Job {self.__class__} exited as a lock was found")
+                        return
                 self.outputs = self._run(**kwargs)
                 _logger.info(f"Job {self.__class__} complete")
             except BaseException:
@@ -169,7 +178,6 @@ def setUp(self, **kwargs):
         :param kwargs:
         :return:
         """
-
         if self.location == 'server':
             self.get_signatures(**kwargs)
 
@@ -196,7 +204,6 @@ def setUp(self, **kwargs):
                 # TODO in future should raise error if even after downloading don't have the correct files
                 self.assert_expected_inputs(raise_error=False)
                 return True
-
         else:
             self.data_handler = self.get_data_handler()
             self.data_handler.setUp()
@@ -206,9 +213,10 @@ def setUp(self, **kwargs):
 
     def tearDown(self):
         """
-        Function to optionally overload to check results
+        Function after runs()
         """
-        pass
+        if self.gpu >= 1:
+            self._lock_file_path().unlink()
 
     def cleanUp(self):
         """
@@ -270,7 +278,9 @@ def get_data_handler(self, location=None):
         :return:
         """
         location = location or self.location
-
+        if location == 'local':
+            return data_handlers.LocalDataHandler(self.session_path, self.signature, one=self.one)
+        self.one = self.one or ONE()
         if location == 'server':
             dhandler = data_handlers.ServerDataHandler(self.session_path, self.signature, one=self.one)
         elif location == 'serverglobus':
@@ -281,9 +291,49 @@ def get_data_handler(self, location=None):
             dhandler = data_handlers.RemoteAwsDataHandler(self.session_path, self.signature, one=self.one)
         elif location == 'SDSC':
             dhandler = data_handlers.SDSCDataHandler(self, self.session_path, self.signature, one=self.one)
-
         return dhandler
 
+    @staticmethod
+    def make_lock_file(taskname="", time_out_secs=7200):
+        """Creates a GPU lock file with a timeout of"""
+        d = {'start': time.time(), 'name': taskname, 'time_out_secs': time_out_secs}
+        with open(Task._lock_file_path(), 'w+') as fid:
+            json.dump(d, fid)
+        return d
+
+    @staticmethod
+    def _lock_file_path():
+        """the lock file is in ~/.one/gpu.lock"""
+        folder = Path.home().joinpath('.one')
+        folder.mkdir(exist_ok=True)
+        return folder.joinpath('gpu.lock')
+
+    def _make_lock_file(self):
+        """creates a lock file with the current time"""
+        return Task.make_lock_file(self.name, self.time_out_secs)
+
+    def is_locked(self):
+        """Checks if there is a lock file for this given task"""
+        lock_file = self._lock_file_path()
+        if not lock_file.exists():
+            return False
+
+        with open(lock_file) as fid:
+            d = json.load(fid)
+        now = time.time()
+        if (now - d['start']) > d['time_out_secs']:
+            lock_file.unlink()
+            return False
+        else:
+            return True
+
+    def _creates_lock(self):
+        if self.is_locked():
+            return False
+        else:
+            self._make_lock_file()
+            return True
+
 
 class Pipeline(abc.ABC):
     """
diff --git a/ibllib/tests/test_atlas.py b/ibllib/tests/test_atlas.py
@@ -57,6 +57,13 @@ def test_mappings_not_lateralized(self):
         inds_[0] = 0
         assert np.all(inds == inds_)
 
+    def test_remap(self):
+        # Test mapping atlas ids from one map to another
+        atlas_id = np.array([463, 685])  # CA3 and PO
+        cosmos_atlas_id = self.brs.remap(atlas_id, source_map='Allen', target_map='Cosmos')
+        expectd_cosmos_id = [1089, 549]  # HPF and TH
+        assert np.all(cosmos_atlas_id == expectd_cosmos_id)
+
 
 class TestAtlasSlicesConversion(unittest.TestCase):
 
diff --git a/ibllib/tests/test_tasks.py b/ibllib/tests/test_tasks.py
diff --git a/release_notes.md b/release_notes.md