Skip to content

Commit 1b3bf77

Browse files
authored
Fix reading of UTF8 annotations (#276)
* make utf8 default reading format * make utf8 default reading format * Add test for reading UTF8 annotations
1 parent 8b3c29f commit 1b3bf77

File tree

3 files changed

+16
-2
lines changed

3 files changed

+16
-2
lines changed

pyedflib/edfreader.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -167,12 +167,17 @@ def _get_float(self, v: np.ndarray) -> np.ndarray:
167167
return result
168168

169169
def _convert_string(self, s: Union[bytes, str]) -> str:
170+
"""decode a byte to string using utf8"""
170171
if isinstance(s, bytes):
171-
return s.decode("latin")
172+
try:
173+
return s.decode("utf_8", "strict") # Prioritize UTF-8
174+
except UnicodeDecodeError:
175+
warnings.warn("Could not decode string '{s=}', using fallback latin encoding")
176+
return s.decode("latin1", errors="replace") # Fallback
172177
elif isinstance(s, str):
173178
return s
174179
else:
175-
return s.decode("utf_8", "strict") # type: ignore
180+
raise TypeError('unknown string type: {type(s)=}')
176181

177182
def getHeader(self) -> Dict[str, Union[str, datetime]]:
178183
"""

pyedflib/tests/data/test_utf8.edf

211 KB
Binary file not shown.

pyedflib/tests/test_edfreader.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ class TestEdfReader(unittest.TestCase):
2222
def setUpClass(cls):
2323
# data_dir = os.path.join(os.getcwd(), 'data')
2424
data_dir = os.path.join(os.path.dirname(__file__), 'data')
25+
cls.edf_utf8 = os.path.join(data_dir, 'test_utf8.edf')
2526
cls.edf_data_file = os.path.join(data_dir, 'test_generator.edf')
2627
cls.bdf_data_file = os.path.join(data_dir, 'test_generator.bdf')
2728
cls.bdf_data_file_datarec_2 = os.path.join(data_dir, 'test_generator_datarec_generator_2.bdf')
@@ -393,6 +394,14 @@ def test_EdfReader_Legacy_Header_Info(self):
393394
for attr_name, expected_value in expected_header.items():
394395
self.assertEqual(getattr(f, attr_name), expected_value)
395396

397+
398+
def test_read_annotations_utf8(self):
399+
"""properly test for UTF8 reading of existing file"""
400+
with pyedflib.EdfReader(self.edf_utf8) as f:
401+
ann_time, ann_duration, ann_text = f.readAnnotations()
402+
self.assertEqual(ann_text[2], '中文测试八个字')
403+
404+
396405
if __name__ == '__main__':
397406
# run_module_suite(argv=sys.argv)
398407
unittest.main()

0 commit comments

Comments
 (0)