Skip to content

Commit b6c326c

Browse files
authored
Move content to new header.py. (#400)
* Change time/date params of csv_to_wfdb
1 parent 002d329 commit b6c326c

File tree

4 files changed

+137
-109
lines changed

4 files changed

+137
-109
lines changed

wfdb/io/_header.py

Lines changed: 4 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
11
import datetime
2-
import re
32
from typing import Collection, List, Tuple
43

54
import numpy as np
65
import pandas as pd
76

87
from wfdb.io import _signal
98
from wfdb.io import util
10-
9+
from wfdb.io.header import HeaderSyntaxError, rx_record, rx_segment, rx_signal
1110

1211
"""
1312
Notes
@@ -32,12 +31,6 @@
3231
so that the user doesn't need to. But when reading, it should
3332
be clear that the fields are missing.
3433
35-
If all of the fields were filled out in a WFDB header file, they would appear
36-
in this order with these seperators:
37-
38-
RECORD_NAME/NUM_SEG NUM_SIG SAMP_FREQ/COUNT_FREQ(BASE_COUNT_VAL) SAMPS_PER_SIG BASE_TIME BASE_DATE
39-
FILE_NAME FORMATxSAMP_PER_FRAME:SKEW+BYTE_OFFSET ADC_GAIN(BASELINE)/UNITS ADC_RES ADC_ZERO CHECKSUM BLOCK_SIZE DESCRIPTION
40-
4134
"""
4235
int_types = (int, np.int64, np.int32, np.int16, np.int8)
4336
float_types = (float, np.float64, np.float32) + int_types
@@ -135,53 +128,6 @@
135128
# Specifications of all WFDB header fields, except for comments
136129
FIELD_SPECS = pd.concat((RECORD_SPECS, SIGNAL_SPECS, SEGMENT_SPECS))
137130

138-
# Regexp objects for reading headers
139-
# Record line
140-
_rx_record = re.compile(
141-
r"""
142-
[ \t]* (?P<record_name>[-\w]+)
143-
/?(?P<n_seg>\d*)
144-
[ \t]+ (?P<n_sig>\d+)
145-
[ \t]* (?P<fs>\d*\.?\d*)
146-
/*(?P<counter_freq>-?\d*\.?\d*)
147-
\(?(?P<base_counter>-?\d*\.?\d*)\)?
148-
[ \t]* (?P<sig_len>\d*)
149-
[ \t]* (?P<base_time>\d{,2}:?\d{,2}:?\d{,2}\.?\d{,6})
150-
[ \t]* (?P<base_date>\d{,2}/?\d{,2}/?\d{,4})
151-
""",
152-
re.VERBOSE,
153-
)
154-
155-
# Signal line
156-
_rx_signal = re.compile(
157-
r"""
158-
[ \t]* (?P<file_name>~?[-\w]*\.?[\w]*)
159-
[ \t]+ (?P<fmt>\d+)
160-
x?(?P<samps_per_frame>\d*)
161-
:?(?P<skew>\d*)
162-
\+?(?P<byte_offset>\d*)
163-
[ \t]* (?P<adc_gain>-?\d*\.?\d*e?[\+-]?\d*)
164-
\(?(?P<baseline>-?\d*)\)?
165-
/?(?P<units>[\w\^\-\?%\/]*)
166-
[ \t]* (?P<adc_res>\d*)
167-
[ \t]* (?P<adc_zero>-?\d*)
168-
[ \t]* (?P<init_value>-?\d*)
169-
[ \t]* (?P<checksum>-?\d*)
170-
[ \t]* (?P<block_size>\d*)
171-
[ \t]* (?P<sig_name>[\S]?[^\t\n\r\f\v]*)
172-
""",
173-
re.VERBOSE,
174-
)
175-
176-
# Segment line
177-
_rx_segment = re.compile(
178-
r"""
179-
[ \t]* (?P<seg_name>[-\w]*~?)
180-
[ \t]+ (?P<seg_len>\d+)
181-
""",
182-
re.VERBOSE,
183-
)
184-
185131

186132
class BaseHeaderMixin(object):
187133
"""
@@ -1013,37 +959,6 @@ def wfdb_strptime(time_string: str) -> datetime.time:
1013959
return datetime.datetime.strptime(time_string, time_fmt).time()
1014960

1015961

1016-
def parse_header_content(
1017-
header_content: str,
1018-
) -> Tuple[List[str], List[str]]:
1019-
"""
1020-
Parse the text of a header file.
1021-
1022-
Parameters
1023-
----------
1024-
header_content: str
1025-
The string content of the full header file
1026-
1027-
Returns
1028-
-------
1029-
header_lines : List[str]
1030-
A list of all the non-comment lines
1031-
comment_lines : List[str]
1032-
A list of all the comment lines
1033-
"""
1034-
header_lines, comment_lines = [], []
1035-
for line in header_content.splitlines():
1036-
line = line.strip()
1037-
# Comment line
1038-
if line.startswith("#"):
1039-
comment_lines.append(line)
1040-
# Non-empty non-comment line = header line.
1041-
elif line:
1042-
header_lines.append(line)
1043-
1044-
return header_lines, comment_lines
1045-
1046-
1047962
def _parse_record_line(record_line: str) -> dict:
1048963
"""
1049964
Extract fields from a record line string into a dictionary.
@@ -1063,7 +978,7 @@ def _parse_record_line(record_line: str) -> dict:
1063978
record_fields = {}
1064979

1065980
# Read string fields from record line
1066-
match = _rx_record.match(record_line)
981+
match = rx_record.match(record_line)
1067982
if match is None:
1068983
raise HeaderSyntaxError("invalid syntax in record line")
1069984
(
@@ -1139,7 +1054,7 @@ def _parse_signal_lines(signal_lines):
11391054

11401055
# Read string fields from signal line
11411056
for ch in range(n_sig):
1142-
match = _rx_signal.match(signal_lines[ch])
1057+
match = rx_signal.match(signal_lines[ch])
11431058
if match is None:
11441059
raise HeaderSyntaxError("invalid syntax in signal line")
11451060
(
@@ -1213,7 +1128,7 @@ def _read_segment_lines(segment_lines):
12131128

12141129
# Read string fields from signal line
12151130
for i in range(len(segment_lines)):
1216-
match = _rx_segment.match(segment_lines[i])
1131+
match = rx_segment.match(segment_lines[i])
12171132
if match is None:
12181133
raise HeaderSyntaxError("invalid syntax in segment line")
12191134
(
@@ -1226,7 +1141,3 @@ def _read_segment_lines(segment_lines):
12261141
segment_fields["seg_len"][i] = int(segment_fields["seg_len"][i])
12271142

12281143
return segment_fields
1229-
1230-
1231-
class HeaderSyntaxError(ValueError):
1232-
"""Invalid syntax found in a WFDB header file."""

wfdb/io/convert/csv.py

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,8 @@
1-
import datetime
21
import os
32

43
import numpy as np
54
import pandas as pd
65

7-
from wfdb.io import _header
86
from wfdb.io.annotation import format_ann_from_df, Annotation, wrann
97
from wfdb.io.record import Record, wrsamp
108

@@ -121,16 +119,13 @@ def csv_to_wfdb(
121119
The base counter value is a floating-point number that specifies the counter
122120
value corresponding to sample 0. If absent, the base counter value is
123121
taken to be 0.
124-
base_time : str, optional
122+
base_time : datetime.time, optional
125123
This field can be present only if the number of samples is also present.
126124
It gives the time of day that corresponds to the beginning of the
127-
record, in 'HH:MM:SS' format (using a 24-hour clock; thus '13:05:00', or
128-
'13:5:0', represent 1:05 pm). If this field is absent, the time-conversion
129-
functions assume a value of '0:0:0', corresponding to midnight.
130-
base_date : str, optional
125+
record.
126+
base_date : datetime.date, optional
131127
This field can be present only if the base time is also present. It contains
132-
the date that corresponds to the beginning of the record, in 'DD/MM/YYYY'
133-
format (e.g., '25/4/1989' is '25 April 1989').
128+
the date that corresponds to the beginning of the record.
134129
comments : list, optional
135130
A list of string comments to be written to the header file. Each string
136131
entry represents a new line to be appended to the bottom of the header
@@ -416,12 +411,6 @@ def csv_to_wfdb(
416411
if verbose:
417412
print("Signal block size: {}".format(block_size))
418413

419-
# Change the dates and times into `datetime` objects
420-
if base_time:
421-
base_time = _header.wfdb_strptime(base_time)
422-
if base_date:
423-
base_date = datetime.datetime.strptime(base_date, "%d/%m/%Y").date()
424-
425414
# Convert array to floating point
426415
p_signal = p_signal.astype("float64")
427416

wfdb/io/header.py

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
"""
2+
Module for parsing header files.
3+
4+
This module will eventually replace _header.py
5+
6+
"""
7+
import datetime
8+
import re
9+
from typing import List, Tuple
10+
11+
12+
class HeaderSyntaxError(ValueError):
13+
"""Invalid syntax found in a WFDB header file."""
14+
15+
16+
# Record line pattern. Format:
17+
# RECORD_NAME/NUM_SEG NUM_SIG SAMP_FREQ/COUNT_FREQ(BASE_COUNT_VAL) SAMPS_PER_SIG BASE_TIME BASE_DATE
18+
rx_record = re.compile(
19+
r"""
20+
[ \t]* (?P<record_name>[-\w]+)
21+
/?(?P<n_seg>\d*)
22+
[ \t]+ (?P<n_sig>\d+)
23+
[ \t]* (?P<fs>\d*\.?\d*)
24+
/*(?P<counter_freq>-?\d*\.?\d*)
25+
\(?(?P<base_counter>-?\d*\.?\d*)\)?
26+
[ \t]* (?P<sig_len>\d*)
27+
[ \t]* (?P<base_time>\d{,2}:?\d{,2}:?\d{,2}\.?\d{,6})
28+
[ \t]* (?P<base_date>\d{,2}/?\d{,2}/?\d{,4})
29+
""",
30+
re.VERBOSE,
31+
)
32+
33+
# Signal line pattern. Format:
34+
# FILE_NAME FORMATxSAMP_PER_FRAME:SKEW+BYTE_OFFSET ADC_GAIN(BASELINE)/UNITS ADC_RES ADC_ZERO CHECKSUM BLOCK_SIZE DESCRIPTION
35+
rx_signal = re.compile(
36+
r"""
37+
[ \t]* (?P<file_name>~?[-\w]*\.?[\w]*)
38+
[ \t]+ (?P<fmt>\d+)
39+
x?(?P<samps_per_frame>\d*)
40+
:?(?P<skew>\d*)
41+
\+?(?P<byte_offset>\d*)
42+
[ \t]* (?P<adc_gain>-?\d*\.?\d*e?[\+-]?\d*)
43+
\(?(?P<baseline>-?\d*)\)?
44+
/?(?P<units>[\w\^\-\?%\/]*)
45+
[ \t]* (?P<adc_res>\d*)
46+
[ \t]* (?P<adc_zero>-?\d*)
47+
[ \t]* (?P<init_value>-?\d*)
48+
[ \t]* (?P<checksum>-?\d*)
49+
[ \t]* (?P<block_size>\d*)
50+
[ \t]* (?P<sig_name>[\S]?[^\t\n\r\f\v]*)
51+
""",
52+
re.VERBOSE,
53+
)
54+
55+
# Segment line
56+
rx_segment = re.compile(
57+
r"""
58+
[ \t]* (?P<seg_name>[-\w]*~?)
59+
[ \t]+ (?P<seg_len>\d+)
60+
""",
61+
re.VERBOSE,
62+
)
63+
64+
65+
def wfdb_strptime(time_string: str) -> datetime.time:
66+
"""
67+
Given a time string in an acceptable WFDB format, return
68+
a datetime.time object.
69+
70+
Valid formats: SS, MM:SS, HH:MM:SS, all with and without microsec.
71+
72+
Parameters
73+
----------
74+
time_string : str
75+
The time to be converted to a datetime.time object.
76+
77+
Returns
78+
-------
79+
datetime.time object
80+
The time converted from str format.
81+
82+
"""
83+
n_colons = time_string.count(":")
84+
85+
if n_colons == 0:
86+
time_fmt = "%S"
87+
elif n_colons == 1:
88+
time_fmt = "%M:%S"
89+
elif n_colons == 2:
90+
time_fmt = "%H:%M:%S"
91+
92+
if "." in time_string:
93+
time_fmt += ".%f"
94+
95+
return datetime.datetime.strptime(time_string, time_fmt).time()
96+
97+
98+
def parse_header_content(
99+
header_content: str,
100+
) -> Tuple[List[str], List[str]]:
101+
"""
102+
Parse the text of a header file.
103+
104+
Parameters
105+
----------
106+
header_content: str
107+
The string content of the full header file
108+
109+
Returns
110+
-------
111+
header_lines : List[str]
112+
A list of all the non-comment lines
113+
comment_lines : List[str]
114+
A list of all the comment lines
115+
116+
"""
117+
header_lines, comment_lines = [], []
118+
for line in header_content.splitlines():
119+
line = line.strip()
120+
# Comment line
121+
if line.startswith("#"):
122+
comment_lines.append(line)
123+
# Non-empty non-comment line = header line.
124+
elif line:
125+
header_lines.append(line)
126+
127+
return header_lines, comment_lines

wfdb/io/record.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from wfdb.io import _signal
1212
from wfdb.io import _url
1313
from wfdb.io import download
14+
from wfdb.io import header
1415
from wfdb.io import util
1516

1617

@@ -1840,7 +1841,7 @@ def rdheader(record_name, pn_dir=None, rd_segments=False):
18401841
header_content = download._stream_header(file_name, pn_dir)
18411842

18421843
# Separate comment and non-comment lines
1843-
header_lines, comment_lines = _header.parse_header_content(header_content)
1844+
header_lines, comment_lines = header.parse_header_content(header_content)
18441845

18451846
# Get fields from record line
18461847
record_fields = _header._parse_record_line(header_lines[0])

0 commit comments

Comments
 (0)