1313
1414import aiofiles
1515import aiohttp
16- from aiohttp import ClientSession , ClientTimeout
16+ from aiohttp import ClientSession
1717from aiohttp .helpers import sentinel
1818
19-
2019logging .basicConfig (
2120 format = "%(asctime)s %(levelname)s:%(name)s: %(message)s" ,
2221 level = logging .DEBUG ,
2827
2928HREF_RE = re .compile (r'href="(.*?)"' )
3029
31- # You can specify timeouts for both the session as a whole and
32- # for individual requests.
33- #
34- # https://aiohttp.readthedocs.io/en/stable/client_quickstart.html#timeouts
35- DEFAULT_GET_TIMEOUT = ClientTimeout (total = 8 ) # seconds
36-
3730
38- async def fetch_html (
39- url : str , session : ClientSession , timeout : ClientTimeout , ** kwargs
40- ) -> str :
31+ async def fetch_html (url : str , session : ClientSession , ** kwargs ) -> str :
4132 """GET request wrapper to fetch page HTML.
4233
4334 kwargs are passed to `session.request()`.
4435 """
4536
4637 # Don't do any try/except here. If either the request or reading
4738 # of bytes raises, let that be handled by caller.
48- resp = await session .request (
49- method = "GET" , url = url , timeout = timeout , ** kwargs
50- )
39+ resp = await session .request (method = "GET" , url = url , ** kwargs )
5140 resp .raise_for_status () # raise if status >= 400
5241 logger .info ("Got response [%s] for URL: %s" , resp .status , url )
5342 html = await resp .text () # For bytes: resp.read()
@@ -56,18 +45,11 @@ async def fetch_html(
5645 return html
5746
5847
59- async def parse (
60- url : str ,
61- session : ClientSession ,
62- timeout : ClientTimeout = DEFAULT_GET_TIMEOUT ,
63- ** kwargs ,
64- ) -> set :
48+ async def parse (url : str , session : ClientSession , ** kwargs ) -> set :
6549 """Find HREFs in the HTML of `url`."""
6650 found = set ()
6751 try :
68- html = await fetch_html (
69- url = url , session = session , timeout = timeout , ** kwargs
70- )
52+ html = await fetch_html (url = url , session = session , ** kwargs )
7153 except (
7254 aiohttp .ClientError ,
7355 aiohttp .http_exceptions .HttpProcessingError ,
@@ -113,12 +95,7 @@ async def write_one(file: BinaryIO, url: str, **kwargs) -> None:
11395 logger .info ("Wrote results for source URL: %s" , url )
11496
11597
116- async def bulk_crawl_and_write (
117- file : BinaryIO ,
118- urls : set ,
119- timeout : Union [object , ClientTimeout ] = sentinel ,
120- ** kwargs ,
121- ) -> None :
98+ async def bulk_crawl_and_write (file : BinaryIO , urls : set , ** kwargs ) -> None :
12299 """Crawl & write concurrently to `file` for multiple `urls`."""
123100 async with ClientSession () as session :
124101 tasks = []
0 commit comments