55
66from os import makedirs
77from os import path
8- from urllib import request # urlopen, Request
9- from urllib . error import HTTPError
8+ from requests import Session , HTTPError
9+
1010from zipfile import ZipFile , is_zipfile
1111
1212from .base import ContentProvider
1818class DoiProvider (ContentProvider ):
1919 """Provide contents of a repository identified by a DOI and some helper functions."""
2020
21- def urlopen (self , req , headers = None ):
21+ def __init__ (self ):
22+ super ().__init__ ()
23+ self .session = Session ()
24+ self .session .headers .update (
25+ {
26+ "user-agent" : "repo2docker {}" .format (__version__ ),
27+ }
28+ )
29+
30+ def _request (self , url , ** kwargs ):
31+ return self .session .get (url , ** kwargs )
32+
33+ urlopen = _request
34+
35+ def _urlopen (self , req , headers = None ):
2236 """A urlopen() helper"""
2337 # someone passed a string, not a request
2438 if not isinstance (req , request .Request ):
@@ -38,7 +52,8 @@ def doi2url(self, doi):
3852 doi = normalize_doi (doi )
3953
4054 try :
41- resp = self .urlopen ("https://doi.org/{}" .format (doi ))
55+ resp = self ._request ("https://doi.org/{}" .format (doi ))
56+ resp .raise_for_status ()
4257 # If the DOI doesn't resolve, just return URL
4358 except HTTPError :
4459 return doi
@@ -53,38 +68,42 @@ def fetch_file(self, file_ref, host, output_dir, unzip=False):
5368 file_url = deep_get (file_ref , host ["download" ])
5469 fname = deep_get (file_ref , host ["filename" ])
5570 logging .debug ("Downloading file {} as {}\n " .format (file_url , fname ))
56- with self .urlopen (file_url ) as src :
71+
72+ yield "Requesting {}\n " .format (file_url )
73+ resp = self ._request (file_url , stream = True )
74+ resp .raise_for_status ()
75+
76+ if path .dirname (fname ):
77+ sub_dir = path .join (output_dir , path .dirname (fname ))
78+ if not path .exists (sub_dir ):
79+ yield "Creating {}\n " .format (sub_dir )
80+ makedirs (sub_dir , exist_ok = True )
81+
82+ dst_fname = path .join (output_dir , fname )
83+ with open (dst_fname , "wb" ) as dst :
84+ yield "Fetching {}\n " .format (fname )
85+ for chunk in resp .iter_content (chunk_size = None ):
86+ dst .write (chunk )
87+
88+ if unzip and is_zipfile (dst_fname ):
89+ yield "Extracting {}\n " .format (fname )
90+ zfile = ZipFile (dst_fname )
91+ zfile .extractall (path = output_dir )
92+ zfile .close ()
93+
94+ # delete downloaded file ...
95+ os .remove (dst_fname )
96+ # ... and any directories we might have created,
97+ # in which case sub_dir will be defined
5798 if path .dirname (fname ):
58- sub_dir = path .join (output_dir , path .dirname (fname ))
59- if not path .exists (sub_dir ):
60- yield "Creating {}\n " .format (sub_dir )
61- makedirs (sub_dir , exist_ok = True )
62-
63- dst_fname = path .join (output_dir , fname )
64- with open (dst_fname , "wb" ) as dst :
65- yield "Fetching {}\n " .format (fname )
66- shutil .copyfileobj (src , dst )
67- # first close the newly written file, then continue
68- # processing it
69- if unzip and is_zipfile (dst_fname ):
70- yield "Extracting {}\n " .format (fname )
71- zfile = ZipFile (dst_fname )
72- zfile .extractall (path = output_dir )
73- zfile .close ()
74-
75- # delete downloaded file ...
76- os .remove (dst_fname )
77- # ... and any directories we might have created,
78- # in which case sub_dir will be defined
79- if path .dirname (fname ):
80- shutil .rmtree (sub_dir )
81-
82- new_subdirs = os .listdir (output_dir )
83- # if there is only one new subdirectory move its contents
84- # to the top level directory
85- if len (new_subdirs ) == 1 :
86- d = new_subdirs [0 ]
87- copytree (path .join (output_dir , d ), output_dir )
88- shutil .rmtree (path .join (output_dir , d ))
89-
90- yield "Fetched files: {}\n " .format (os .listdir (output_dir ))
99+ shutil .rmtree (sub_dir )
100+
101+ new_subdirs = os .listdir (output_dir )
102+ # if there is only one new subdirectory move its contents
103+ # to the top level directory
104+ if len (new_subdirs ) == 1 :
105+ d = new_subdirs [0 ]
106+ copytree (path .join (output_dir , d ), output_dir )
107+ shutil .rmtree (path .join (output_dir , d ))
108+
109+ yield "Fetched files: {}\n " .format (os .listdir (output_dir ))
0 commit comments