Skip to content

Commit eadc5bf

Browse files
committed
fixes #335
1 parent 3e2e946 commit eadc5bf

File tree

3 files changed

+77
-24
lines changed

3 files changed

+77
-24
lines changed

fastcore/_nbdev.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,7 @@
200200
"urljson": "03b_net.ipynb",
201201
"urlcheck": "03b_net.ipynb",
202202
"urlclean": "03b_net.ipynb",
203+
"urlretrieve": "03b_net.ipynb",
203204
"urlsave": "03b_net.ipynb",
204205
"urlvalid": "03b_net.ipynb",
205206
"urlrequest": "03b_net.ipynb",

fastcore/net.py

Lines changed: 35 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
'HTTP422UnprocessableEntityError', 'HTTP423LockedError', 'HTTP424FailedDependencyError',
1111
'HTTP425TooEarlyError', 'HTTP426UpgradeRequiredError', 'HTTP428PreconditionRequiredError',
1212
'HTTP429TooManyRequestsError', 'HTTP431HeaderFieldsTooLargeError', 'HTTP451LegalReasonsError', 'urlopen',
13-
'urlread', 'urljson', 'urlcheck', 'urlclean', 'urlsave', 'urlvalid', 'urlrequest', 'urlsend', 'do_request',
14-
'start_server', 'start_client']
13+
'urlread', 'urljson', 'urlcheck', 'urlclean', 'urlretrieve', 'urlsave', 'urlvalid', 'urlrequest', 'urlsend',
14+
'do_request', 'start_server', 'start_client']
1515

1616
# Cell
1717
from .imports import *
@@ -21,10 +21,10 @@
2121
from .parallel import *
2222
from functools import wraps
2323

24-
import json,urllib
24+
import json,urllib,contextlib
2525
import socket,urllib.request,http,urllib
2626
from contextlib import contextmanager,ExitStack
27-
from urllib.request import Request
27+
from urllib.request import Request,urlretrieve,install_opener
2828
from urllib.error import HTTPError,URLError
2929
from urllib.parse import urlencode,urlparse,urlunparse
3030
from http.client import InvalidURL
@@ -75,6 +75,7 @@ class HTTP5xxServerError(HTTPError):
7575
# Cell
7676
_opener = urllib.request.build_opener()
7777
_opener.addheaders = list(url_default_headers.items())
78+
install_opener(_opener)
7879

7980
_httperrors = (
8081
(400,'Bad Request'),(401,'Unauthorized'),(402,'Payment Required'),(403,'Forbidden'),(404,'Not Found'),
@@ -137,15 +138,41 @@ def urlclean(url):
137138
return urlunparse(urlparse(str(url))[:3]+('','',''))
138139

139140
# Cell
140-
def urlsave(url, dest=None):
141+
def urlretrieve(url, filename=None, reporthook=None, data=None):
142+
"Same as `urllib.request.urlretrieve` but also works with `Request` objects"
143+
with contextlib.closing(urlopen(url, data)) as fp:
144+
headers = fp.info()
145+
if filename: tfp = open(filename, 'wb')
146+
else:
147+
tfp = tempfile.NamedTemporaryFile(delete=False)
148+
filename = tfp.name
149+
150+
with tfp:
151+
bs,size,read,blocknum = 1024*8,-1,0,0
152+
if "content-length" in headers: size = int(headers["Content-Length"])
153+
if reporthook: reporthook(blocknum, bs, size)
154+
while True:
155+
block = fp.read(bs)
156+
if not block: break
157+
read += len(block)
158+
tfp.write(block)
159+
blocknum += 1
160+
if reporthook: reporthook(blocknum, bs, size)
161+
162+
if size >= 0 and read < size:
163+
raise ContentTooShortError(f"retrieval incomplete: got only {read} out of {size} bytes", headers)
164+
return filename,headers
165+
166+
# Cell
167+
def urlsave(url, dest=None, reporthook=None):
141168
"Retrieve `url` and save based on its name"
142-
res = urlread(urlwrap(url), decode=False)
143169
name = urlclean(Path(url).name)
144170
if dest is None: dest = name
145171
dest = Path(dest)
146172
if dest.is_dir(): dest = dest/name
147-
Path(dest).write_bytes(res)
148-
return dest
173+
dest.parent.mkdir(parents=True, exist_ok=True)
174+
nm,msg = urlretrieve(url, dest, reporthook)
175+
return nm
149176

150177
# Cell
151178
def urlvalid(x):

nbs/03b_net.ipynb

Lines changed: 41 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,10 @@
2323
"from fastcore.parallel import *\n",
2424
"from functools import wraps\n",
2525
"\n",
26-
"import json,urllib\n",
26+
"import json,urllib,contextlib\n",
2727
"import socket,urllib.request,http,urllib\n",
2828
"from contextlib import contextmanager,ExitStack\n",
29-
"from urllib.request import Request\n",
29+
"from urllib.request import Request,urlretrieve,install_opener\n",
3030
"from urllib.error import HTTPError,URLError\n",
3131
"from urllib.parse import urlencode,urlparse,urlunparse\n",
3232
"from http.client import InvalidURL"
@@ -238,6 +238,7 @@
238238
"#export\n",
239239
"_opener = urllib.request.build_opener()\n",
240240
"_opener.addheaders = list(url_default_headers.items())\n",
241+
"install_opener(_opener)\n",
241242
"\n",
242243
"_httperrors = (\n",
243244
" (400,'Bad Request'),(401,'Unauthorized'),(402,'Payment Required'),(403,'Forbidden'),(404,'Not Found'),\n",
@@ -367,30 +368,55 @@
367368
"outputs": [],
368369
"source": [
369370
"#export\n",
370-
"def urlsave(url, dest=None):\n",
371+
"def urlretrieve(url, filename=None, reporthook=None, data=None):\n",
372+
" \"Same as `urllib.request.urlretrieve` but also works with `Request` objects\"\n",
373+
" with contextlib.closing(urlopen(url, data)) as fp:\n",
374+
" headers = fp.info()\n",
375+
" if filename: tfp = open(filename, 'wb')\n",
376+
" else:\n",
377+
" tfp = tempfile.NamedTemporaryFile(delete=False)\n",
378+
" filename = tfp.name\n",
379+
"\n",
380+
" with tfp:\n",
381+
" bs,size,read,blocknum = 1024*8,-1,0,0\n",
382+
" if \"content-length\" in headers: size = int(headers[\"Content-Length\"])\n",
383+
" if reporthook: reporthook(blocknum, bs, size)\n",
384+
" while True:\n",
385+
" block = fp.read(bs)\n",
386+
" if not block: break\n",
387+
" read += len(block)\n",
388+
" tfp.write(block)\n",
389+
" blocknum += 1\n",
390+
" if reporthook: reporthook(blocknum, bs, size)\n",
391+
"\n",
392+
" if size >= 0 and read < size:\n",
393+
" raise ContentTooShortError(f\"retrieval incomplete: got only {read} out of {size} bytes\", headers)\n",
394+
" return filename,headers"
395+
]
396+
},
397+
{
398+
"cell_type": "code",
399+
"execution_count": null,
400+
"metadata": {},
401+
"outputs": [],
402+
"source": [
403+
"#export\n",
404+
"def urlsave(url, dest=None, reporthook=None):\n",
371405
" \"Retrieve `url` and save based on its name\"\n",
372-
" res = urlread(urlwrap(url), decode=False)\n",
373406
" name = urlclean(Path(url).name)\n",
374407
" if dest is None: dest = name\n",
375408
" dest = Path(dest)\n",
376409
" if dest.is_dir(): dest = dest/name\n",
377-
" Path(dest).write_bytes(res)\n",
378-
" return dest"
410+
" dest.parent.mkdir(parents=True, exist_ok=True)\n",
411+
" nm,msg = urlretrieve(url, dest, reporthook)\n",
412+
" return nm"
379413
]
380414
},
381415
{
382416
"cell_type": "code",
383417
"execution_count": null,
384418
"metadata": {},
385-
"outputs": [
386-
{
387-
"name": "stdout",
388-
"output_type": "stream",
389-
"text": [
390-
"[Path('/tmp/tmpr3kv5gq_/index.html')]\n"
391-
]
392-
}
393-
],
419+
"outputs": [],
394420
"source": [
395421
"#skip\n",
396422
"with tempfile.TemporaryDirectory() as d: urlsave('http://www.google.com/index.html', d)"
@@ -646,7 +672,6 @@
646672
"Converted 05_transform.ipynb.\n",
647673
"Converted 07_meta.ipynb.\n",
648674
"Converted 08_script.ipynb.\n",
649-
"Converted index.ipynb.\n",
650675
"Converted parallel_win.ipynb.\n"
651676
]
652677
}

0 commit comments

Comments
 (0)