Skip to content

Commit 0911032

Browse files
authored
Merge pull request #218 from MIT-LCP/read_edf_87
Produces record file from EDF format #87
2 parents ca29df6 + cfbeec8 commit 0911032

File tree

11 files changed

+500
-83
lines changed

11 files changed

+500
-83
lines changed

sample-data/SC4001E0-PSG.edf

46.1 MB
Binary file not shown.

sample-data/SC4001E0_PSG.dat

46.1 MB
Binary file not shown.

sample-data/SC4001E0_PSG.hea

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
SC4001E0_PSG 7 1 79500 16:13:00 24/04/1989
2+
SC4001E0_PSG.dat 16x100 10.6640625/uV 11 0 53 29442 0 EEG Fpz-Cz
3+
SC4001E0_PSG.dat 16x100 10.4198473282(5)/uV 11 0 -21 -8660 0 EEG Pz-Oz
4+
SC4001E0_PSG.dat 16x100 2.02923686819/uV 11 0 33 11375 0 EOG horizontal
5+
SC4001E0_PSG.dat 16 1(1) 11 0 -482 1209 0 Resp oro-nasal
6+
SC4001E0_PSG.dat 16 500(1)/uV 12 0 1776 19019 0 EMG submental
7+
SC4001E0_PSG.dat 16 930(-34468)/DegC 12 -58 133 24053 0 Temp rectal
8+
SC4001E0_PSG.dat 16 1 11 1 920 18040 0 Event marker

sample-data/n16.dat

29.4 MB
Binary file not shown.

sample-data/n16.edf

29.4 MB
Binary file not shown.

sample-data/n16.hea

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
n16 5 100 3080000 22:34:47 01/01/2006
2+
n16.dat 16 44.9629231183/uV 15 0 -161 -9172 0 Fp2-F4
3+
n16.dat 16 44.9629231183/uV 15 0 1926 29657 0 F4-C4
4+
n16.dat 16 44.9629231183/uV 15 0 -4623 23837 0 C4-P4
5+
n16.dat 16 44.9629231183/uV 15 0 2472 25301 0 P4-O2
6+
n16.dat 16 44.9629231183/uV 15 0 -2354 -17711 0 C4-A1

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@
6363
'pandas>=0.19.1',
6464
'scipy>=0.19.0',
6565
'sklearn>=0.0',
66+
'mne>=0.20.5'
6667
],
6768

6869
# List additional groups of dependencies here (e.g. development

tests/test_record.py

Lines changed: 97 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
import os
2+
import pdb
23
import shutil
34
import unittest
45

56
import numpy as np
6-
77
import wfdb
88

99

@@ -249,6 +249,99 @@ def test_2e(self):
249249
sig_target = sig_target.reshape([977, 1])
250250
assert np.array_equal(sig, sig_target)
251251

252+
def test_2f(self):
253+
"""
254+
EDF format conversion to MIT for uniform sample rates.
255+
256+
"""
257+
# Uniform sample rates
258+
record_MIT = wfdb.rdrecord('sample-data/n16').__dict__
259+
record_EDF = wfdb.rdrecord('sample-data/n16.edf').__dict__
260+
261+
fields = list(record_MIT.keys())
262+
# Original MIT format method of checksum is outdated, sometimes
263+
# the same value though
264+
fields.remove('checksum')
265+
# Original MIT format units are less comprehensive since they
266+
# default to mV if unknown.. therefore added more default labels
267+
fields.remove('units')
268+
269+
test_results = []
270+
for field in fields:
271+
# Signal value will be slightly off due to C to Python type conversion
272+
if field == 'p_signal':
273+
true_array = np.array(record_MIT[field])
274+
pred_array = np.array(record_EDF[field])
275+
sig_diff = np.abs((pred_array - true_array) / true_array)
276+
sig_diff[sig_diff == -np.inf] = 0
277+
sig_diff[sig_diff == np.inf] = 0
278+
sig_diff = np.nanmean(sig_diff,0)
279+
# 5% tolerance
280+
if np.max(sig_diff) <= 5:
281+
test_results.append(True)
282+
else:
283+
test_results.append(False)
284+
elif field == 'init_value':
285+
signal_diff = [abs(record_MIT[field][i] - record_EDF[field][i]) for i in range(len(record_MIT[field]))]
286+
if abs(max(min(signal_diff), max(signal_diff), key=abs)) <= 2:
287+
test_results.append(True)
288+
else:
289+
test_results.append(False)
290+
else:
291+
test_results.append(record_MIT[field] == record_MIT[field])
292+
293+
target_results = len(fields) * [True]
294+
assert np.array_equal(test_results, target_results)
295+
296+
def test_2g(self):
297+
"""
298+
EDF format conversion to MIT for non-uniform sample rates.
299+
300+
"""
301+
# Non-uniform sample rates
302+
record_MIT = wfdb.rdrecord('sample-data/SC4001E0_PSG').__dict__
303+
record_EDF = wfdb.rdrecord('sample-data/SC4001E0-PSG.edf').__dict__
304+
305+
fields = list(record_MIT.keys())
306+
# Original MIT format method of checksum is outdated, sometimes
307+
# the same value though
308+
fields.remove('checksum')
309+
# Original MIT format units are less comprehensive since they
310+
# default to mV if unknown.. therefore added more default labels
311+
fields.remove('units')
312+
# Initial value of signal will be off due to resampling done by
313+
# MNE in the EDF reading phase
314+
fields.remove('init_value')
315+
# Samples per frame will be off due to resampling done by MNE in
316+
# the EDF reading phase... I should probably fix this later
317+
fields.remove('samps_per_frame')
318+
319+
test_results = []
320+
for field in fields:
321+
# Signal value will be slightly off due to C to Python type conversion
322+
if field == 'p_signal':
323+
true_array = np.array(record_MIT[field])
324+
pred_array = np.array(record_EDF[field])
325+
sig_diff = np.abs((pred_array - true_array) / true_array)
326+
sig_diff[sig_diff == -np.inf] = 0
327+
sig_diff[sig_diff == np.inf] = 0
328+
sig_diff = np.nanmean(sig_diff,0)
329+
# 5% tolerance
330+
if np.max(sig_diff) <= 5:
331+
test_results.append(True)
332+
else:
333+
test_results.append(False)
334+
elif field == 'init_value':
335+
signal_diff = [abs(record_MIT[field][i] - record_EDF[field][i]) for i in range(len(record_MIT[field]))]
336+
if abs(max(min(signal_diff), max(signal_diff), key=abs)) <= 2:
337+
test_results.append(True)
338+
else:
339+
test_results.append(False)
340+
else:
341+
test_results.append(record_MIT[field] == record_MIT[field])
342+
343+
target_results = len(fields) * [True]
344+
assert np.array_equal(test_results, target_results)
252345

253346
# --------------------- 3. Multi-dat records --------------------- #
254347

@@ -258,12 +351,12 @@ def test_3a(self):
258351
Target file created with:
259352
rdsamp -r sample-data/s0010_re | cut -f 2- > record-3a
260353
"""
261-
record= wfdb.rdrecord('sample-data/s0010_re', physical=False)
354+
record = wfdb.rdrecord('sample-data/s0010_re', physical=False)
262355
sig = record.d_signal
263356
sig_target = np.genfromtxt('tests/target-output/record-3a')
264357

265358
# Compare data streaming from Physionet
266-
record_pn= wfdb.rdrecord('s0010_re', physical=False,
359+
record_pn = wfdb.rdrecord('s0010_re', physical=False,
267360
pn_dir='ptbdb/patient001')
268361

269362
# Test file writing
@@ -514,7 +607,7 @@ def test_multi_variable_b(self):
514607
from several segments.
515608
516609
Target file created with:
517-
rdsamp -r sample-data/multi-segment/s00001/s00001-2896-10-10-00-31 -f s14428364 -t s14428375 -P | cut -f 2- > record-multi-variable-b
610+
rdsamp -r sample-data/multi-segment/s00001/s00001-2896-10-10-00-31 -f s14428364 -t s14428375 -P | cut -f 2- > record-multi-variable-b
518611
"""
519612
record = wfdb.rdrecord('sample-data/multi-segment/s00001/s00001-2896-10-10-00-31',
520613
sampfrom=14428364, sampto=14428375)

wfdb/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from .io.record import (Record, MultiRecord, rdheader, rdrecord, rdsamp,
2-
wrsamp, dl_database)
2+
wrsamp, dl_database, edf2mit)
33
from .io.annotation import (Annotation, rdann, wrann, show_ann_labels,
44
show_ann_classes)
55
from .io.download import get_dbs, get_record_list, dl_files, set_db_index_url

wfdb/io/_signal.py

Lines changed: 55 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -726,6 +726,7 @@ def convert_dtype(self, physical, return_res, smooth_frames):
726726
self.e_d_signal[ch] = self.e_d_signal[ch].astype(return_dtype, copy=False)
727727
return
728728

729+
729730
def calc_checksum(self, expanded=False):
730731
"""
731732
Calculate the checksum(s) of the input signal.
@@ -859,7 +860,7 @@ def smooth_frames(self, sigtype='physical'):
859860

860861
def _rd_segment(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, byte_offset,
861862
samps_per_frame, skew, sampfrom, sampto, channels,
862-
smooth_frames, ignore_skew, return_res=64):
863+
smooth_frames, ignore_skew, no_file=False, sig_data=None, return_res=64):
863864
"""
864865
Read the digital samples from a single segment record's associated
865866
dat file(s).
@@ -897,6 +898,12 @@ def _rd_segment(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, byte_offset,
897898
Specifies whether to apply the skew to align the signals in the
898899
output variable (False), or to ignore the skew field and load in
899900
all values contained in the dat files unaligned (True).
901+
no_file : bool, optional
902+
Used when using this function with just an array of signal data
903+
and no associated file to read the data from.
904+
sig_data : ndarray, optional
905+
The signal data that would normally be imported using the associated
906+
.dat and .hea files. Should only be used when no_file is set to True.
900907
return_res : int, optional
901908
The numpy array dtype of the returned signals. Options are: 64,
902909
32, 16, and 8, where the value represents the numpy int or float
@@ -918,6 +925,10 @@ def _rd_segment(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, byte_offset,
918925
specifications of the segment.
919926
920927
"""
928+
# Check for valid inputs
929+
if no_file and sig_data is None:
930+
raise Exception('signal_dat empty: No signal data provided')
931+
921932
# Avoid changing outer variables
922933
byte_offset = byte_offset[:]
923934
samps_per_frame = samps_per_frame[:]
@@ -984,10 +995,17 @@ def _rd_segment(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, byte_offset,
984995

985996
# Read each wanted dat file and store signals
986997
for fn in w_file_name:
987-
signals[:, out_dat_channel[fn]] = _rd_dat_signals(fn, dir_name, pn_dir,
988-
w_fmt[fn], len(datchannel[fn]), sig_len, w_byte_offset[fn],
989-
w_samps_per_frame[fn], w_skew[fn], sampfrom, sampto,
990-
smooth_frames)[:, r_w_channel[fn]]
998+
if no_file:
999+
signals[:, out_dat_channel[fn]] = _rd_dat_signals(fn, dir_name,
1000+
pn_dir, w_fmt[fn], len(datchannel[fn]), sig_len,
1001+
w_byte_offset[fn], w_samps_per_frame[fn], w_skew[fn],
1002+
sampfrom, sampto, smooth_frames, no_file=True,
1003+
sig_data=sig_data)[:, r_w_channel[fn]]
1004+
else:
1005+
signals[:, out_dat_channel[fn]] = _rd_dat_signals(fn, dir_name,
1006+
pn_dir, w_fmt[fn], len(datchannel[fn]), sig_len,
1007+
w_byte_offset[fn], w_samps_per_frame[fn], w_skew[fn],
1008+
sampfrom, sampto, smooth_frames)[:, r_w_channel[fn]]
9911009

9921010
# Return each sample in signals with multiple samples/frame, without smoothing.
9931011
# Return a list of numpy arrays for each signal.
@@ -996,10 +1014,16 @@ def _rd_segment(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, byte_offset,
9961014

9971015
for fn in w_file_name:
9981016
# Get the list of all signals contained in the dat file
999-
datsignals = _rd_dat_signals(fn, dir_name, pn_dir, w_fmt[fn],
1000-
len(datchannel[fn]), sig_len, w_byte_offset[fn],
1001-
w_samps_per_frame[fn], w_skew[fn], sampfrom, sampto,
1002-
smooth_frames)
1017+
if no_file:
1018+
datsignals = _rd_dat_signals(fn, dir_name, pn_dir, w_fmt[fn],
1019+
len(datchannel[fn]), sig_len, w_byte_offset[fn],
1020+
w_samps_per_frame[fn], w_skew[fn], sampfrom, sampto,
1021+
smooth_frames, no_file=True, sig_data=sig_data)
1022+
else:
1023+
datsignals = _rd_dat_signals(fn, dir_name, pn_dir, w_fmt[fn],
1024+
len(datchannel[fn]), sig_len, w_byte_offset[fn],
1025+
w_samps_per_frame[fn], w_skew[fn], sampfrom, sampto,
1026+
smooth_frames)
10031027

10041028
# Copy over the wanted signals
10051029
for cn in range(len(out_dat_channel[fn])):
@@ -1010,7 +1034,7 @@ def _rd_segment(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, byte_offset,
10101034

10111035
def _rd_dat_signals(file_name, dir_name, pn_dir, fmt, n_sig, sig_len,
10121036
byte_offset, samps_per_frame, skew, sampfrom, sampto,
1013-
smooth_frames):
1037+
smooth_frames, no_file=False, sig_data=None):
10141038
"""
10151039
Read all signals from a WFDB dat file.
10161040
@@ -1042,6 +1066,12 @@ def _rd_dat_signals(file_name, dir_name, pn_dir, fmt, n_sig, sig_len,
10421066
The final sample number to be read from the signals.
10431067
smooth_frames : bool
10441068
Whether to smooth channels with multiple samples/frame.
1069+
no_file : bool, optional
1070+
Used when using this function with just an array of signal data
1071+
and no associated file to read the data from.
1072+
sig_data : ndarray, optional
1073+
The signal data that would normally be imported using the associated
1074+
.dat and .hea files. Should only be used when no_file is set to True.
10451075
10461076
Returns
10471077
-------
@@ -1058,6 +1088,10 @@ def _rd_dat_signals(file_name, dir_name, pn_dir, fmt, n_sig, sig_len,
10581088
specifications of the segment.
10591089
10601090
"""
1091+
# Check for valid inputs
1092+
if no_file and sig_data is None:
1093+
raise Exception('signal_dat empty: No signal data provided')
1094+
10611095
# Total number of samples per frame
10621096
tsamps_per_frame = sum(samps_per_frame)
10631097
# The signal length to read (per channel)
@@ -1086,26 +1120,27 @@ def _rd_dat_signals(file_name, dir_name, pn_dir, fmt, n_sig, sig_len,
10861120
# already load samples.
10871121

10881122
# Read values from dat file. Append bytes/samples if needed.
1123+
if no_file:
1124+
data_to_read = sig_data
1125+
else:
1126+
data_to_read = _rd_dat_file(file_name, dir_name, pn_dir, fmt,
1127+
start_byte, n_read_samples)
1128+
10891129
if extra_flat_samples:
10901130
if fmt in UNALIGNED_FMTS:
10911131
# Extra number of bytes to append onto the bytes read from
10921132
# the dat file.
10931133
n_extra_bytes = total_process_bytes - total_read_bytes
10941134

1095-
sig_data = np.concatenate((_rd_dat_file(file_name, dir_name,
1096-
pn_dir, fmt, start_byte,
1097-
n_read_samples),
1135+
sig_data = np.concatenate((data_to_read,
10981136
np.zeros(n_extra_bytes,
1099-
dtype=np.dtype(DATA_LOAD_TYPES[fmt]))))
1137+
dtype=np.dtype(DATA_LOAD_TYPES[fmt]))))
11001138
else:
1101-
sig_data = np.concatenate((_rd_dat_file(file_name, dir_name,
1102-
pn_dir, fmt, start_byte,
1103-
n_read_samples),
1139+
sig_data = np.concatenate((data_to_read,
11041140
np.zeros(extra_flat_samples,
1105-
dtype=np.dtype(DATA_LOAD_TYPES[fmt]))))
1141+
dtype=np.dtype(DATA_LOAD_TYPES[fmt]))))
11061142
else:
1107-
sig_data = _rd_dat_file(file_name, dir_name, pn_dir, fmt, start_byte,
1108-
n_read_samples)
1143+
sig_data = data_to_read
11091144

11101145
# Finish processing the read data into proper samples if not already
11111146

0 commit comments

Comments
 (0)