33import os
44import sys
55import requests
6- import multiprocessing
7- from . import records
6+ from IPython .display import display
87
98# Read a header file from physiobank
109def streamheader (recordname , pbdir ):
@@ -88,87 +87,109 @@ def streamannotation(filename, pbdir):
8887 return annbytes
8988
9089
91- # Download all the WFDB files from a physiobank database
92- def dldatabase (pbdb , dlbasedir , keepsubdirs = True , overwrite = False ):
90+ # Return a list of all the physiobank databases available
91+ def getdblist ():
92+ """
93+ Usage:
94+ dblist = getdblist()
95+ """
96+ url = os .path .join (dbindexurl , 'DBS' )
97+ r = requests .get (url )
98+
99+ dblist = r .content .decode ('ascii' ).splitlines ()
100+ dblist = [re .sub ('\t {2,}' , '\t ' , line ).split ('\t ' ) for line in dblist ]
101+
102+ return dblist
103+
104+
93105
106+ # Download specific files from a physiobank database
107+ def dldatabasefiles (pbdb , dlbasedir , files , keepsubdirs = True , overwrite = False ):
94108 # Full url physiobank database
95109 dburl = os .path .join (dbindexurl , pbdb )
96-
97110 # Check if the database is valid
98111 r = requests .get (dburl )
99112 r .raise_for_status ()
100113
101- # Check for a RECORDS file
102- recordsurl = os .path .join (dburl , 'RECORDS' )
103- r = requests .get (recordsurl )
104- if r .status_code == 404 :
105- sys .exit ('The database ' + dburl + ' has no WFDB files to download' )
106-
107- # Get each line as a string
108- recordlist = r .content .decode ('ascii' ).splitlines ()
109-
110- # All files to download (relative to the database's home directory)
111- allfiles = []
112-
113- for rec in recordlist :
114- # Check out whether each record is in MIT or EDF format
115- if rec .endswith ('.edf' ):
116- allfiles .append (rec )
117- else :
118- # If MIT format, have to figure out all associated files
119- allfiles .append (rec + '.hea' )
120-
121- dirname , baserecname = os .path .split (rec )
122-
123- record = records .rdheader (baserecname , pbdir = os .path .join (pbdb , dirname ))
124-
125- # Single segment record
126- if type (record ) == records .Record :
127- # Add all dat files of the segment
128- for file in record .filename :
129- allfiles .append (os .path .join (dirname , file ))
130-
131- # Multi segment record
132- else :
133- for seg in record .segname :
134- # Skip empty segments
135- if seg == '~' :
136- continue
137- # Add the header
138- allfiles .append (os .path .join (dirname , seg + '.hea' ))
139- # Layout specifier has no dat files
140- if seg .endswith ('_layout' ):
141- continue
142- # Add all dat files of the segment
143- recseg = records .rdheader (seg , pbdir = os .path .join (pbdb , dirname ))
144- for file in recseg .filename :
145- allfiles .append (os .path .join (dirname , file ))
146-
147- dlinputs = [(os .path .split (file )[1 ], os .path .split (file )[0 ], pbdb , dlbasedir , keepsubdirs , overwrite ) for file in allfiles ]
148-
149- # Make the local download dir if it doesn't exist
150- if not os .path .isdir (dlbasedir ):
151- os .makedirs (dlbasedir )
152- print ("Created local base download directory: " , dlbasedir )
114+ # Construct the urls to download
115+ dlinputs = [(os .path .split (file )[1 ], os .path .split (file )[0 ], pbdb , dlbasedir , keepsubdirs , overwrite ) for file in files ]
153116
154- print ('Download files...' )
117+ # Make any required local directories
118+ makelocaldirs (dlbasedir , dlinputs , keepsubdirs )
155119
120+ print ('Downloading files...' )
156121 # Create multiple processes to download files.
157122 # Limit to 2 connections to avoid overloading the server
158123 pool = multiprocessing .Pool (processes = 2 )
159124 pool .map (dlpbfile , dlinputs )
160-
161125 print ('Finished downloading files' )
162126
163127 return
164128
165129
166- # Download selected WFDB files from a physiobank database
167- # def dldatabaserecords(pbdb, dlbasedir, keepsubirs = True, overwrite = False):
130+ # ---- Helper functions for downloading physiobank files ------- #
131+
132+ def getrecordlist (dburl , records ):
133+ # Check for a RECORDS file
134+ if records == 'all' :
135+ r = requests .get (os .path .join (dburl , 'RECORDS' ))
136+ if r .status_code == 404 :
137+ sys .exit ('The database ' + dburl + ' has no WFDB files to download' )
138+
139+ # Get each line as a string
140+ recordlist = r .content .decode ('ascii' ).splitlines ()
141+ # Otherwise the records are input manually
142+ else :
143+ recordlist = records
144+
145+ return recordlist
146+
147+ def getannotators (dburl , annotators ):
148+
149+ if annotators is not None :
150+ # Check for an ANNOTATORS file
151+ r = requests .get (os .path .join (dburl , 'ANNOTATORS' ))
152+ if r .status_code == 404 :
153+ sys .exit ('The database ' + dburl + ' has no annotation files to download' )
154+ # Make sure the input annotators are present in the database
155+ annlist = r .content .decode ('ascii' ).splitlines ()
156+ annlist = [a .split ('\t ' )[0 ] for a in annlist ]
157+
158+ # Get the annotation file types required
159+ if annotators == 'all' :
160+ # all possible ones
161+ annotators = annlist
162+ else :
163+ # In case they didn't input a list
164+ if type (annotators ) == str :
165+ annotators = [annotators ]
166+ # user input ones. Check validity.
167+ for a in annotators :
168+ if a not in annlist :
169+ sys .exit ('The database contains no annotators with extension: ' , a )
168170
171+ return annotators
172+
173+ # Make any required local directories
174+ def makelocaldirs (dlbasedir , dlinputs , keepsubdirs ):
175+
176+ # Make the local download dir if it doesn't exist
177+ if not os .path .isdir (dlbasedir ):
178+ os .makedirs (dlbasedir )
179+ print ("Created local base download directory: " , dlbasedir )
180+ # Create all required local subdirectories
181+ # This must be out of dlpbfile to
182+ # avoid clash in multiprocessing
183+ if keepsubdirs :
184+ dldirs = set ([os .path .join (dlbasedir , d [1 ]) for d in dlinputs ])
185+ for d in dldirs :
186+ if not os .path .isdir (d ):
187+ os .makedirs (d )
188+ return
169189
170190
171191# Download a file from physiobank
192+ # The input args are to be unpacked for the use of multiprocessing
172193def dlpbfile (inputs ):
173194
174195 basefile , subdir , pbdb , dlbasedir , keepsubdirs , overwrite = inputs
@@ -187,28 +208,27 @@ def dlpbfile(inputs):
187208 # Figure out where the file should be locally
188209 if keepsubdirs :
189210 dldir = os .path .join (dlbasedir , subdir )
190- # Make the local download subdirectory if it doesn't exist
191- if not os .path .isdir (dldir ):
192- os .makedirs (dldir )
193- print ("Created local download subdirectory: " , dldir )
194211 else :
195212 dldir = dlbasedir
196213
197214 localfile = os .path .join (dldir , basefile )
198215
199- # The file exists. Process accordingly.
216+ # The file exists locally.
200217 if os .path .isfile (localfile ):
201218 # Redownload regardless
202219 if overwrite :
203220 dlfullfile (url , localfile )
221+ # Process accordingly.
204222 else :
205223 localfilesize = os .path .getsize (localfile )
206224 # Local file is smaller than it should be. Append it.
207225 if localfilesize < onlinefilesize :
208226 print ('Detected partially downloaded file: ' + localfile + ' Appending file...' )
209227 headers = {"Range" : "bytes=" + str (localfilesize )+ "-" , 'Accept-Encoding' : '*/*' }
210228 r = requests .get (url , headers = headers , stream = True )
211- with open (localfile , "wb" ) as writefile :
229+ print ('headers: ' , headers )
230+ print ('r content length: ' , len (r .content ))
231+ with open (localfile , "ba" ) as writefile :
212232 writefile .write (r .content )
213233 print ('Done appending.' )
214234 # Local file is larger than it should be. Redownload.
@@ -232,4 +252,5 @@ def dlfullfile(url, localfile):
232252
233253
234254
255+
235256dbindexurl = 'http://physionet.org/physiobank/database/'
0 commit comments