Skip to content

Commit 76f2a17

Browse files
committed
fixes #168
1 parent d058b6c commit 76f2a17

File tree

3 files changed

+69
-6
lines changed

3 files changed

+69
-6
lines changed

fastcore/_nbdev.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,8 @@
141141
"join_path_file": "03_xtras.ipynb",
142142
"urlread": "03_xtras.ipynb",
143143
"urljson": "03_xtras.ipynb",
144+
"urlwrap": "03_xtras.ipynb",
145+
"urlcheck": "03_xtras.ipynb",
144146
"run": "03_xtras.ipynb",
145147
"do_request": "03_xtras.ipynb",
146148
"sort_by_run": "03_xtras.ipynb",

fastcore/xtras.py

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22

33
__all__ = ['dict2obj', 'tuplify', 'uniqueify', 'is_listy', 'shufflish', 'mapped', 'IterLen', 'ReindexCollection',
44
'open_file', 'save_pickle', 'load_pickle', 'maybe_open', 'image_size', 'bunzip', 'join_path_file', 'urlread',
5-
'urljson', 'run', 'do_request', 'sort_by_run', 'trace', 'round_multiple', 'modified_env', 'ContextManagers',
6-
'str2bool', 'set_num_threads', 'ProcessPoolExecutor', 'ThreadPoolExecutor', 'parallel', 'run_procs',
7-
'parallel_gen', 'threaded']
5+
'urljson', 'urlwrap', 'urlcheck', 'run', 'do_request', 'sort_by_run', 'trace', 'round_multiple',
6+
'modified_env', 'ContextManagers', 'str2bool', 'set_num_threads', 'ProcessPoolExecutor',
7+
'ThreadPoolExecutor', 'parallel', 'run_procs', 'parallel_gen', 'threaded']
88

99
# Cell
1010
from .imports import *
@@ -16,8 +16,9 @@
1616
from contextlib import contextmanager,ExitStack
1717
from pdb import set_trace
1818
from urllib.request import Request,urlopen
19-
from urllib.error import HTTPError
19+
from urllib.error import HTTPError,URLError
2020
from urllib.parse import urlencode
21+
from http.client import InvalidURL
2122
from threading import Thread
2223

2324
# Cell
@@ -186,6 +187,25 @@ def urljson(url, data=None):
186187
"Retrieve `url` and decode json"
187188
return json.loads(urlread(url, data=data))
188189

190+
# Cell
191+
_ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.183 Safari/537.36'
192+
193+
# Cell
194+
def urlwrap(url):
195+
"Wrap `url` in a urllib `Request` with a user-agent header"
196+
if not isinstance(url,Request): url = Request(url)
197+
url.headers['User-Agent'] = _ua
198+
return url
199+
200+
# Cell
201+
def urlcheck(url, timeout=10):
202+
if not url: return True
203+
try:
204+
with urlopen(urlwrap(url), timeout=timeout) as u: return u.status<400
205+
except URLError: return False
206+
except socket.timeout: return False
207+
except InvalidURL: return False
208+
189209
# Cell
190210
def run(cmd, *rest, ignore_ex=False, as_bytes=False):
191211
"Pass `cmd` (splitting with `shlex` if string) to `subprocess.run`; return `stdout`; raise `IOError` if fails"

nbs/03_xtras.ipynb

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,9 @@
2525
"from contextlib import contextmanager,ExitStack\n",
2626
"from pdb import set_trace\n",
2727
"from urllib.request import Request,urlopen\n",
28-
"from urllib.error import HTTPError\n",
28+
"from urllib.error import HTTPError,URLError\n",
2929
"from urllib.parse import urlencode\n",
30+
"from http.client import InvalidURL\n",
3031
"from threading import Thread"
3132
]
3233
},
@@ -539,7 +540,7 @@
539540
{
540541
"data": {
541542
"text/plain": [
542-
"['h', 'f', 'd', 'a', 'e', 'b', 'g', 'c']"
543+
"['h', 'g', 'd', 'a', 'b', 'e', 'f', 'c']"
543544
]
544545
},
545546
"execution_count": null,
@@ -1128,6 +1129,46 @@
11281129
" return json.loads(urlread(url, data=data))"
11291130
]
11301131
},
1132+
{
1133+
"cell_type": "code",
1134+
"execution_count": null,
1135+
"metadata": {},
1136+
"outputs": [],
1137+
"source": [
1138+
"#export\n",
1139+
"_ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.183 Safari/537.36'"
1140+
]
1141+
},
1142+
{
1143+
"cell_type": "code",
1144+
"execution_count": null,
1145+
"metadata": {},
1146+
"outputs": [],
1147+
"source": [
1148+
"#export\n",
1149+
"def urlwrap(url):\n",
1150+
" \"Wrap `url` in a urllib `Request` with a user-agent header\"\n",
1151+
" if not isinstance(url,Request): url = Request(url)\n",
1152+
" url.headers['User-Agent'] = _ua\n",
1153+
" return url"
1154+
]
1155+
},
1156+
{
1157+
"cell_type": "code",
1158+
"execution_count": null,
1159+
"metadata": {},
1160+
"outputs": [],
1161+
"source": [
1162+
"#export\n",
1163+
"def urlcheck(url, timeout=10):\n",
1164+
" if not url: return True\n",
1165+
" try:\n",
1166+
" with urlopen(urlwrap(url), timeout=timeout) as u: return u.status<400\n",
1167+
" except URLError: return False\n",
1168+
" except socket.timeout: return False\n",
1169+
" except InvalidURL: return False"
1170+
]
1171+
},
11311172
{
11321173
"cell_type": "code",
11331174
"execution_count": null,

0 commit comments

Comments
 (0)