Merge branch 'master' into fix-bad-brainvision-fileref

zm711 · web-flow · commit a7b819f98307 · 2025-09-26T20:50:25.000-04:00
diff --git a/neo/rawio/blackrockrawio.py b/neo/rawio/blackrockrawio.py
@@ -331,17 +331,16 @@ def _parse_header(self):
             # read nsx headers
             nsx_header_reader = self._nsx_header_reader[spec_version]
             self._nsx_basic_header[nsx_nb], self._nsx_ext_header[nsx_nb] = nsx_header_reader(nsx_nb)
-            
+
             # The Blackrock defines period as the number of  1/30_000 seconds between data points
             # E.g. it is 1 for 30_000, 3 for 10_000, etc
             nsx_period = self._nsx_basic_header[nsx_nb]["period"]
             sampling_rate = 30_000.0 / nsx_period
             self._nsx_sampling_frequency[nsx_nb] = float(sampling_rate)
 
-        
         # Parase data packages
         for nsx_nb in self._avail_nsx:
-        
+
             # The only way to know if it is the Precision Time Protocol of file spec 3.0
             # is to check for nanosecond timestamp resolution.
             is_ptp_variant = (
@@ -399,7 +398,9 @@ def _parse_header(self):
 
         self.nsx_datas = {}
         # Keep public attribute for backward compatibility but let's use the private one and maybe deprecate this at some point
-        self.sig_sampling_rates = {nsx_number: self._nsx_sampling_frequency[nsx_number] for nsx_number in self.nsx_to_load}
+        self.sig_sampling_rates = {
+            nsx_number: self._nsx_sampling_frequency[nsx_number] for nsx_number in self.nsx_to_load
+        }
         if len(self.nsx_to_load) > 0:
             for nsx_nb in self.nsx_to_load:
                 basic_header = self._nsx_basic_header[nsx_nb]
@@ -1072,7 +1073,6 @@ def _read_nsx_dataheader_spec_v30_ptp(
             # some packets have more than 1 sample. Not actually ptp. Revert to non-ptp variant.
             return self._read_nsx_dataheader_spec_v22_30(nsx_nb, filesize=filesize, offset=header_size)
 
-
         # Segment data, at the moment, we segment, where the data has gaps that are longer
         # than twice the sampling period.
         sampling_rate = self._nsx_sampling_frequency[nsx_nb]
@@ -1081,29 +1081,32 @@ def _read_nsx_dataheader_spec_v30_ptp(
         # The raw timestamps are the indices of an ideal clock that ticks at `timestamp_resolution` times per second.
         # We convert this indices to actual timestamps in seconds
         raw_timestamps = struct_arr["timestamps"]
-        timestamps_sampling_rate = self._nsx_basic_header[nsx_nb]["timestamp_resolution"]  # clocks per sec uint64 or uint32
+        timestamps_sampling_rate = self._nsx_basic_header[nsx_nb][
+            "timestamp_resolution"
+        ]  # clocks per sec uint64 or uint32
         timestamps_in_seconds = raw_timestamps / timestamps_sampling_rate
 
         time_differences = np.diff(timestamps_in_seconds)
         gap_indices = np.argwhere(time_differences > segmentation_threshold).flatten()
         segment_starts = np.hstack((0, 1 + gap_indices))
-        
+
         # Report gaps if any are found
         if len(gap_indices) > 0:
             import warnings
+
             threshold_ms = segmentation_threshold * 1000
-            
+
             # Calculate all gap details in vectorized operations
             gap_durations_seconds = time_differences[gap_indices]
             gap_durations_ms = gap_durations_seconds * 1000
             gap_positions_seconds = timestamps_in_seconds[gap_indices] - timestamps_in_seconds[0]
-            
+
             # Build gap detail lines all at once
             gap_detail_lines = [
                 f"| {index:>15,} | {pos:>21.6f} | {dur:>21.3f} |\n"
                 for index, pos, dur in zip(gap_indices, gap_positions_seconds, gap_durations_ms)
             ]
-            
+
             segmentation_report_message = (
                 f"\nFound {len(gap_indices)} gaps for nsx {nsx_nb} where samples are farther apart than {threshold_ms:.3f} ms.\n"
                 f"Data will be segmented at these locations to create {len(segment_starts)} segments.\n\n"
@@ -1112,15 +1115,17 @@ def _read_nsx_dataheader_spec_v30_ptp(
                 "| Sample Index    | Sample at             | Gap Jump              |\n"
                 "|                 | (Seconds)             | (Milliseconds)        |\n"
                 "+-----------------+-----------------------+-----------------------+\n"
-                + ''.join(gap_detail_lines) +
-                "+-----------------+-----------------------+-----------------------+\n"
+                + "".join(gap_detail_lines)
+                + "+-----------------+-----------------------+-----------------------+\n"
             )
             warnings.warn(segmentation_report_message)
-        
+
         # Calculate all segment boundaries and derived values in one operation
         segment_boundaries = list(segment_starts) + [len(struct_arr) - 1]
-        segment_num_data_points = [segment_boundaries[i+1] - segment_boundaries[i] for i in range(len(segment_starts))]
-        
+        segment_num_data_points = [
+            segment_boundaries[i + 1] - segment_boundaries[i] for i in range(len(segment_starts))
+        ]
+
         size_of_data_block = struct_arr.dtype.itemsize
         segment_offsets = [header_size + pos * size_of_data_block for pos in segment_starts]
 
diff --git a/neo/rawio/neuralynxrawio/neuralynxrawio.py b/neo/rawio/neuralynxrawio/neuralynxrawio.py
@@ -55,7 +55,7 @@
 )
 import numpy as np
 import os
-import pathlib
+from pathlib import Path
 import copy
 import warnings
 from collections import namedtuple, OrderedDict
@@ -151,15 +151,15 @@ def __init__(
 
         if filename is not None:
             include_filenames = [filename]
-            warnings.warn("`filename` is deprecated and will be removed. Please use `include_filenames` instead")
+            warnings.warn("`filename` is deprecated and will be removed in version 1.0. Please use `include_filenames` instead")
 
         if exclude_filename is not None:
             if isinstance(exclude_filename, str):
                 exclude_filenames = [exclude_filename]
             else:
                 exclude_filenames = exclude_filename
             warnings.warn(
-                "`exclude_filename` is deprecated and will be removed. Please use `exclude_filenames` instead"
+                "`exclude_filename` is deprecated and will be removed in version 1.0. Please use `exclude_filenames` instead"
             )
 
         if include_filenames is None:
@@ -214,30 +214,43 @@ def _parse_header(self):
         unit_annotations = []
         event_annotations = []
 
-        if self.rawmode == "one-dir":
-            filenames = sorted(os.listdir(self.dirname))
-        else:
-            filenames = self.include_filenames
-
-        filenames = [f for f in filenames if f not in self.exclude_filenames]
-        full_filenames = [os.path.join(self.dirname, f) for f in filenames]
-
-        for filename in full_filenames:
-            if not os.path.isfile(filename):
-                raise ValueError(
-                    f"Provided Filename is not a file: "
-                    f"{filename}. If you want to provide a "
-                    f"directory use the `dirname` keyword"
-                )
+        # 1) Get file paths based on mode and validate existence for multiple-files mode
+        if self.rawmode == "multiple-files":
+            # For multiple-files mode, validate that all explicitly provided files exist
+            file_paths = []
+            for filename in self.include_filenames:
+                full_path = Path(self.dirname) / filename
+                if not full_path.is_file():
+                    raise ValueError(
+                        f"Provided Filename is not a file: "
+                        f"{full_path}. If you want to provide a "
+                        f"directory use the `dirname` keyword"
+                    )
+                file_paths.append(full_path)
+        else:  # one-dir mode
+            # For one-dir mode, get all files from directory
+            dir_path = Path(self.dirname)
+            file_paths = [p for p in dir_path.iterdir() if p.is_file()]
+            file_paths = sorted(file_paths, key=lambda p: p.name)
+
+        # 2) Filter by exclude filenames
+        file_paths = [fp for fp in file_paths if fp.name not in self.exclude_filenames]
+
+        # 3) Filter to keep only files with correct extensions
+        # Note: suffix[1:] removes the leading dot from file extension (e.g., ".ncs" -> "ncs")
+        valid_file_paths = [
+            fp for fp in file_paths
+            if fp.suffix[1:].lower() in self.extensions
+        ]
+
+        # Convert back to strings for backwards compatibility with existing code
+        full_filenames = [str(fp) for fp in valid_file_paths]
 
         stream_props = {}  # {(sampling_rate, n_samples, t_start): {stream_id: [filenames]}
 
         for filename in full_filenames:
             _, ext = os.path.splitext(filename)
-            ext = ext[1:]  # remove dot
-            ext = ext.lower()  # make lower case for comparisons
-            if ext not in self.extensions:
-                continue
+            ext = ext[1:].lower()  # remove dot and make lower case
 
             # Skip Ncs files with only header. Other empty file types
             # will have an empty dataset constructed later.
@@ -574,7 +587,7 @@ def _get_file_map(filename):
         Create memory maps when needed
         see also https://github.com/numpy/numpy/issues/19340
         """
-        filename = pathlib.Path(filename)
+        filename = Path(filename)
         suffix = filename.suffix.lower()[1:]
 
         if suffix == "ncs":
diff --git a/neo/rawio/neuralynxrawio/nlxheader.py b/neo/rawio/neuralynxrawio/nlxheader.py
@@ -55,7 +55,13 @@ def _to_bool(txt):
         ("DspHighCutNumTaps", "", None),
         ("DspHighCutFilterType", "", None),
         ("DspDelayCompensation", "", None),
-        ("DspFilterDelay_µs", "", None),
+        # DspFilterDelay key with flexible µ symbol matching
+        # Different Neuralynx versions encode the µ (micro) symbol differently:
+        # - Some files use single-byte encoding (latin-1): DspFilterDelay_µs (raw bytes: \xb5)
+        # - Other files use UTF-8 encoding: DspFilterDelay_µs (raw bytes: \xc2\xb5)
+        # When UTF-8 encoded µ (\xc2\xb5) is decoded with latin-1, it becomes "Âµ"
+        # This regex matches both variants: "µs" and "Âµs" but normalizes to "DspFilterDelay_µs"
+        (r"DspFilterDelay_[Â]?µs", "DspFilterDelay_µs", None),
         ("DisabledSubChannels", "", None),
         ("WaveformLength", "", int),
         ("AlignmentPt", "", None),
diff --git a/neo/test/rawiotest/test_neuralynxrawio.py b/neo/test/rawiotest/test_neuralynxrawio.py
@@ -31,6 +31,7 @@ class TestNeuralynxRawIO(
         "neuralynx/Cheetah_v5.6.3/original_data",
         "neuralynx/Cheetah_v5.7.4/original_data",
         "neuralynx/Cheetah_v6.3.2/incomplete_blocks",
+        "neuralynx/two_streams_different_header_encoding",
     ]
 
     def test_scan_ncs_files(self):
@@ -175,6 +176,43 @@ def test_exclude_filenames(self):
         self.assertEqual(len(rawio.header["spike_channels"]), 8)
         self.assertEqual(len(rawio.header["event_channels"]), 0)
 
+    def test_directory_in_data_folder(self):
+        """
+        Test that directories inside the data folder are properly ignored
+        and don't cause errors during parsing.
+        """
+        import tempfile
+        import shutil
+
+        # Use existing test data directory
+        dname = self.get_local_path("neuralynx/Cheetah_v5.6.3/original_data/")
+
+        # Create a temporary copy to avoid modifying test data
+        with tempfile.TemporaryDirectory() as temp_dir:
+            temp_data_dir = os.path.join(temp_dir, "test_data")
+            shutil.copytree(dname, temp_data_dir)
+
+            # Create a subdirectory inside the test data
+            test_subdir = os.path.join(temp_data_dir, "raw fscv data with all recorded ch")
+            os.makedirs(test_subdir, exist_ok=True)
+
+            # Create some files in the subdirectory to make it more realistic
+            with open(os.path.join(test_subdir, "some_file.txt"), "w") as f:
+                f.write("test file content")
+
+            # This should not raise an error despite the directory presence
+            rawio = NeuralynxRawIO(dirname=temp_data_dir)
+            rawio.parse_header()
+
+            # Verify that the reader still works correctly
+            self.assertEqual(rawio._nb_segment, 2)
+            self.assertEqual(len(rawio.ncs_filenames), 2)
+            self.assertEqual(len(rawio.nev_filenames), 1)
+            sigHdrs = rawio.header["signal_channels"]
+            self.assertEqual(sigHdrs.size, 2)
+            self.assertEqual(len(rawio.header["spike_channels"]), 8)
+            self.assertEqual(len(rawio.header["event_channels"]), 2)
+
 
 class TestNcsRecordingType(BaseTestRawIO, unittest.TestCase):
     """