Merge branch 'feature/phyio' of https://github.com/rjurkus/python-neo into rjurkus-feature/phyio

apdavison · apdavison · commit a7e148fa35a8 · 2021-03-04T11:57:08.000+01:00
# Conflicts:
#	doc/source/authors.rst
diff --git a/doc/source/authors.rst b/doc/source/authors.rst
@@ -55,6 +55,7 @@ and may not be the current affiliation of a contributor.
 * Peter N Steinmetz [22]
 * Shashwat Sridhar
 * Alessio Buccino [23]
+* Regimantas Jurkus [13]
 
 1. Centre de Recherche en Neuroscience de Lyon, CNRS UMR5292 - INSERM U1028 - Universite Claude Bernard Lyon 1
 2. Unité de Neuroscience, Information et Complexité, CNRS UPR 3293, Gif-sur-Yvette, France
diff --git a/neo/io/__init__.py b/neo/io/__init__.py
@@ -43,6 +43,7 @@
 * :attr:`NixIO`
 * :attr:`NSDFIO`
 * :attr:`OpenEphysIO`
+* :attr:`PhyIO`
 * :attr:`PickleIO`
 * :attr:`PlexonIO`
 * :attr:`RawBinarySignalIO`
@@ -176,6 +177,10 @@
 
     .. autoattribute:: extensions
 
+.. autoclass:: neo.io.PhyIO
+
+    .. autoattribute:: extensions
+
 .. autoclass:: neo.io.PickleIO
 
     .. autoattribute:: extensions
@@ -271,6 +276,7 @@
 from neo.io.nixio_fr import NixIO as NixIOFr
 from neo.io.nsdfio import NSDFIO
 from neo.io.openephysio import OpenEphysIO
+from neo.io.phyio import PhyIO
 from neo.io.pickleio import PickleIO
 from neo.io.plexonio import PlexonIO
 from neo.io.rawbinarysignalio import RawBinarySignalIO
@@ -315,6 +321,7 @@
     NeuroshareIO,
     NSDFIO,
     OpenEphysIO,
+    PhyIO,
     PickleIO,
     PlexonIO,
     RawBinarySignalIO,
diff --git a/neo/io/phyio.py b/neo/io/phyio.py
@@ -0,0 +1,12 @@
+from neo.io.basefromrawio import BaseFromRaw
+from neo.rawio.phyrawio import PhyRawIO
+
+
+class PhyIO(PhyRawIO, BaseFromRaw):
+    name = 'Phy IO'
+    description = "Phy IO"
+    mode = 'dir'
+
+    def __init__(self, dirname):
+        PhyRawIO.__init__(self, dirname=dirname)
+        BaseFromRaw.__init__(self, dirname)
diff --git a/neo/rawio/__init__.py b/neo/rawio/__init__.py
@@ -25,6 +25,7 @@
 * :attr:`NeuroScopeRawIO`
 * :attr:`NIXRawIO`
 * :attr:`OpenEphysRawIO`
+* :attr:'PhyRawIO'
 * :attr:`PlexonRawIO`
 * :attr:`RawBinarySignalRawIO`
 * :attr:`RawMCSRawIO`
@@ -87,6 +88,10 @@
 
     .. autoattribute:: extensions
 
+.. autoclass:: neo.rawio.PhyRawIO
+
+    .. autoattribute:: extensions
+
 .. autoclass:: neo.rawio.PlexonRawIO
 
     .. autoattribute:: extensions
@@ -136,6 +141,7 @@
 from neo.rawio.neuroscoperawio import NeuroScopeRawIO
 from neo.rawio.nixrawio import NIXRawIO
 from neo.rawio.openephysrawio import OpenEphysRawIO
+from neo.rawio.phyrawio import PhyRawIO
 from neo.rawio.plexonrawio import PlexonRawIO
 from neo.rawio.rawbinarysignalrawio import RawBinarySignalRawIO
 from neo.rawio.rawmcsrawio import RawMCSRawIO
@@ -159,6 +165,7 @@
     NeuroScopeRawIO,
     NIXRawIO,
     OpenEphysRawIO,
+    PhyRawIO,
     PlexonRawIO,
     RawBinarySignalRawIO,
     RawMCSRawIO,
diff --git a/neo/rawio/phyrawio.py b/neo/rawio/phyrawio.py
@@ -0,0 +1,225 @@
+"""
+PhyRawIO is a class to handle Phy spike sorting data.
+Ported from:
+https://github.com/SpikeInterface/spikeextractors/blob/
+f20b1219eba9d3330d5d7cd7ce8d8924a255b8c2/spikeextractors/
+extractors/phyextractors/phyextractors.py
+
+Author: Regimantas Jurkus
+"""
+
+from .baserawio import (BaseRawIO, _signal_channel_dtype, _unit_channel_dtype,
+                        _event_channel_dtype)
+
+import numpy as np
+from pathlib import Path
+import re
+import csv
+import ast
+
+
+class PhyRawIO(BaseRawIO):
+    """
+    Class for reading Phy data.
+
+    Usage:
+        >>> import neo.rawio
+        >>> r = neo.rawio.PhyRawIO(dirname='/dir/to/phy/folder')
+        >>> r.parse_header()
+        >>> print(r)
+        >>> spike_timestamp = r.get_spike_timestamps(block_index=0,
+        ... seg_index=0, unit_index=0, t_start=None, t_stop=None)
+        >>> spike_times = r.rescale_spike_timestamp(spike_timestamp, 'float64')
+
+    """
+    extensions = []
+    rawmode = 'one-dir'
+
+    def __init__(self, dirname=''):
+        BaseRawIO.__init__(self)
+        self.dirname = dirname
+
+    def _source_name(self):
+        return self.dirname
+
+    def _parse_header(self):
+        phy_folder = Path(self.dirname)
+
+        self._spike_times = np.load(phy_folder / 'spike_times.npy')
+        self._spike_templates = np.load(phy_folder / 'spike_templates.npy')
+
+        if (phy_folder / 'spike_clusters.npy').is_file():
+            self._spike_clusters = np.load(phy_folder / 'spike_clusters.npy')
+        else:
+            self._spike_clusters = self._spike_templates
+
+        # TODO: Add this when array_annotations are ready
+        # if (phy_folder / 'amplitudes.npy').is_file():
+        #     amplitudes = np.squeeze(np.load(phy_folder / 'amplitudes.npy'))
+        # else:
+        #     amplitudes = np.ones(len(spike_times))
+        #
+        # if (phy_folder / 'pc_features.npy').is_file():
+        #     pc_features = np.squeeze(np.load(phy_folder / 'pc_features.npy'))
+        # else:
+        #     pc_features = None
+
+        # SEE: https://stackoverflow.com/questions/4388626/
+        #  python-safe-eval-string-to-bool-int-float-none-string
+        if (phy_folder / 'params.py').is_file():
+            with (phy_folder / 'params.py').open('r') as f:
+                contents = f.read()
+            metadata = dict()
+            contents = contents.replace('\n', ' ')
+            pattern = re.compile(r'(\S*)[\s]?=[\s]?(\S*)')
+            elements = pattern.findall(contents)
+            for key, value in elements:
+                metadata[key.lower()] = ast.literal_eval(value)
+
+        self._sampling_frequency = metadata['sample_rate']
+
+        clust_ids = np.unique(self._spike_clusters)
+        self.unit_labels = list(clust_ids)
+
+        self._t_start = 0.
+        self._t_stop = max(self._spike_times).item() / self._sampling_frequency
+
+        sig_channels = []
+        sig_channels = np.array(sig_channels, dtype=_signal_channel_dtype)
+
+        unit_channels = []
+        for i, clust_id in enumerate(clust_ids):
+            unit_name = f'unit {clust_id}'
+            unit_id = f'{clust_id}'
+            wf_units = ''
+            wf_gain = 0
+            wf_offset = 0.
+            wf_left_sweep = 0
+            wf_sampling_rate = 0
+            unit_channels.append((unit_name, unit_id, wf_units, wf_gain,
+                                  wf_offset, wf_left_sweep, wf_sampling_rate))
+        unit_channels = np.array(unit_channels, dtype=_unit_channel_dtype)
+
+        event_channels = []
+        event_channels = np.array(event_channels, dtype=_event_channel_dtype)
+
+        self.header = {}
+        self.header['nb_block'] = 1
+        self.header['nb_segment'] = [1]
+        self.header['signal_channels'] = sig_channels
+        self.header['unit_channels'] = unit_channels
+        self.header['event_channels'] = event_channels
+
+        self._generate_minimal_annotations()
+
+        csv_tsv_files = [x for x in phy_folder.iterdir() if
+                         x.suffix == '.csv' or x.suffix == '.tsv']
+
+        # annotation_lists is list of list of dict (python==3.8)
+        # or list of list of ordered dict (python==3.6)
+        # SEE: https://docs.python.org/3/library/csv.html#csv.DictReader
+        annotation_lists = [self._parse_tsv_or_csv_to_list_of_dict(file)
+                            for file in csv_tsv_files]
+
+        bl_ann = self.raw_annotations['blocks'][0]
+        bl_ann['name'] = "Block #0"
+        seg_ann = bl_ann['segments'][0]
+        seg_ann['name'] = 'Seg #0 Block #0'
+        for index, clust_id in enumerate(clust_ids):
+            spiketrain_an = seg_ann['units'][index]
+
+            # Loop over list of list of dict and annotate each st
+            for annotation_list in annotation_lists:
+                clust_key, property_name = tuple(annotation_list[0].
+                                                 keys())
+                if property_name == 'KSLabel':
+                    annotation_name = 'quality'
+                else:
+                    annotation_name = property_name.lower()
+                for annotation_dict in annotation_list:
+                    if int(annotation_dict[clust_key]) == clust_id:
+                        spiketrain_an[annotation_name] = \
+                            annotation_dict[property_name]
+                        break
+
+    def _segment_t_start(self, block_index, seg_index):
+        assert block_index == 0
+        return self._t_start
+
+    def _segment_t_stop(self, block_index, seg_index):
+        assert block_index == 0
+        return self._t_stop
+
+    def _get_signal_size(self, block_index, seg_index, channel_indexes=None):
+        return None
+
+    def _get_signal_t_start(self, block_index, seg_index, channel_indexes):
+        return None
+
+    def _get_analogsignal_chunk(self, block_index, seg_index, i_start, i_stop,
+                                channel_indexes):
+        return None
+
+    def _spike_count(self, block_index, seg_index, unit_index):
+        assert block_index == 0
+        spikes = self._spike_clusters
+        unit_label = self.unit_labels[unit_index]
+        mask = spikes == unit_label
+        nb_spikes = np.sum(mask)
+        return nb_spikes
+
+    def _get_spike_timestamps(self, block_index, seg_index, unit_index,
+                              t_start, t_stop):
+        assert block_index == 0
+        assert seg_index == 0
+
+        unit_label = self.unit_labels[unit_index]
+        mask = self._spike_clusters == unit_label
+        spike_timestamps = self._spike_times[mask]
+
+        if t_start is not None:
+            start_frame = int(t_start * self._sampling_frequency)
+            spike_timestamps = spike_timestamps[spike_timestamps >=
+                                                start_frame]
+        if t_stop is not None:
+            end_frame = int(t_stop * self._sampling_frequency)
+            spike_timestamps = spike_timestamps[spike_timestamps < end_frame]
+
+        return spike_timestamps
+
+    def _rescale_spike_timestamp(self, spike_timestamps, dtype):
+        spike_times = spike_timestamps.astype(dtype)
+        spike_times /= self._sampling_frequency
+        return spike_times
+
+    def _get_spike_raw_waveforms(self, block_index, seg_index, unit_index,
+                                 t_start, t_stop):
+        return None
+
+    def _event_count(self, block_index, seg_index, event_channel_index):
+        return None
+
+    def _get_event_timestamps(self, block_index, seg_index,
+                              event_channel_index, t_start, t_stop):
+        return None
+
+    def _rescale_event_timestamp(self, event_timestamps, dtype):
+        return None
+
+    def _rescale_epoch_duration(self, raw_duration, dtype):
+        return None
+
+    @staticmethod
+    def _parse_tsv_or_csv_to_list_of_dict(filename):
+        list_of_dict = list()
+        with open(filename) as csvfile:
+            if filename.suffix == '.csv':
+                reader = csv.DictReader(csvfile, delimiter=',')
+            elif filename.suffix == '.tsv':
+                reader = csv.DictReader(csvfile, delimiter='\t')
+            else:
+                raise ValueError("Function parses only .csv or .tsv files")
+            for row in reader:
+                list_of_dict.append(row)
+
+        return list_of_dict
diff --git a/neo/test/iotest/test_phyio.py b/neo/test/iotest/test_phyio.py
diff --git a/neo/test/rawiotest/test_phyrawio.py b/neo/test/rawiotest/test_phyrawio.py