|
23 | 23 | "from fastcore.parallel import *\n", |
24 | 24 | "from functools import wraps\n", |
25 | 25 | "\n", |
26 | | - "import json,urllib\n", |
| 26 | + "import json,urllib,contextlib\n", |
27 | 27 | "import socket,urllib.request,http,urllib\n", |
28 | 28 | "from contextlib import contextmanager,ExitStack\n", |
29 | | - "from urllib.request import Request\n", |
| 29 | + "from urllib.request import Request,urlretrieve,install_opener\n", |
30 | 30 | "from urllib.error import HTTPError,URLError\n", |
31 | 31 | "from urllib.parse import urlencode,urlparse,urlunparse\n", |
32 | 32 | "from http.client import InvalidURL" |
|
238 | 238 | "#export\n", |
239 | 239 | "_opener = urllib.request.build_opener()\n", |
240 | 240 | "_opener.addheaders = list(url_default_headers.items())\n", |
| 241 | + "install_opener(_opener)\n", |
241 | 242 | "\n", |
242 | 243 | "_httperrors = (\n", |
243 | 244 | " (400,'Bad Request'),(401,'Unauthorized'),(402,'Payment Required'),(403,'Forbidden'),(404,'Not Found'),\n", |
|
367 | 368 | "outputs": [], |
368 | 369 | "source": [ |
369 | 370 | "#export\n", |
370 | | - "def urlsave(url, dest=None):\n", |
| 371 | + "def urlretrieve(url, filename=None, reporthook=None, data=None):\n", |
| 372 | + " \"Same as `urllib.request.urlretrieve` but also works with `Request` objects\"\n", |
| 373 | + " with contextlib.closing(urlopen(url, data)) as fp:\n", |
| 374 | + " headers = fp.info()\n", |
| 375 | + " if filename: tfp = open(filename, 'wb')\n", |
| 376 | + " else:\n", |
| 377 | + " tfp = tempfile.NamedTemporaryFile(delete=False)\n", |
| 378 | + " filename = tfp.name\n", |
| 379 | + "\n", |
| 380 | + " with tfp:\n", |
| 381 | + " bs,size,read,blocknum = 1024*8,-1,0,0\n", |
| 382 | + " if \"content-length\" in headers: size = int(headers[\"Content-Length\"])\n", |
| 383 | + " if reporthook: reporthook(blocknum, bs, size)\n", |
| 384 | + " while True:\n", |
| 385 | + " block = fp.read(bs)\n", |
| 386 | + " if not block: break\n", |
| 387 | + " read += len(block)\n", |
| 388 | + " tfp.write(block)\n", |
| 389 | + " blocknum += 1\n", |
| 390 | + " if reporthook: reporthook(blocknum, bs, size)\n", |
| 391 | + "\n", |
| 392 | + " if size >= 0 and read < size:\n", |
| 393 | + " raise ContentTooShortError(f\"retrieval incomplete: got only {read} out of {size} bytes\", headers)\n", |
| 394 | + " return filename,headers" |
| 395 | + ] |
| 396 | + }, |
| 397 | + { |
| 398 | + "cell_type": "code", |
| 399 | + "execution_count": null, |
| 400 | + "metadata": {}, |
| 401 | + "outputs": [], |
| 402 | + "source": [ |
| 403 | + "#export\n", |
| 404 | + "def urlsave(url, dest=None, reporthook=None):\n", |
371 | 405 | " \"Retrieve `url` and save based on its name\"\n", |
372 | | - " res = urlread(urlwrap(url), decode=False)\n", |
373 | 406 | " name = urlclean(Path(url).name)\n", |
374 | 407 | " if dest is None: dest = name\n", |
375 | 408 | " dest = Path(dest)\n", |
376 | 409 | " if dest.is_dir(): dest = dest/name\n", |
377 | | - " Path(dest).write_bytes(res)\n", |
378 | | - " return dest" |
| 410 | + " dest.parent.mkdir(parents=True, exist_ok=True)\n", |
| 411 | + " nm,msg = urlretrieve(url, dest, reporthook)\n", |
| 412 | + " return nm" |
379 | 413 | ] |
380 | 414 | }, |
381 | 415 | { |
382 | 416 | "cell_type": "code", |
383 | 417 | "execution_count": null, |
384 | 418 | "metadata": {}, |
385 | | - "outputs": [ |
386 | | - { |
387 | | - "name": "stdout", |
388 | | - "output_type": "stream", |
389 | | - "text": [ |
390 | | - "[Path('/tmp/tmpr3kv5gq_/index.html')]\n" |
391 | | - ] |
392 | | - } |
393 | | - ], |
| 419 | + "outputs": [], |
394 | 420 | "source": [ |
395 | 421 | "#skip\n", |
396 | 422 | "with tempfile.TemporaryDirectory() as d: urlsave('http://www.google.com/index.html', d)" |
|
646 | 672 | "Converted 05_transform.ipynb.\n", |
647 | 673 | "Converted 07_meta.ipynb.\n", |
648 | 674 | "Converted 08_script.ipynb.\n", |
649 | | - "Converted index.ipynb.\n", |
650 | 675 | "Converted parallel_win.ipynb.\n" |
651 | 676 | ] |
652 | 677 | } |
|
0 commit comments