File tree Expand file tree Collapse file tree 1 file changed +24
-5
lines changed
Expand file tree Collapse file tree 1 file changed +24
-5
lines changed Original file line number Diff line number Diff line change 2727
2828from pyfaup .faup import Faup
2929
30+
31+ import signal
32+
33+ class TimeoutException (Exception ):
34+ pass
35+
36+ def timeout_handler (signum , frame ):
37+ raise TimeoutException
38+
39+
40+ signal .signal (signal .SIGALRM , timeout_handler )
41+
42+
3043# interact with splash_crawler API
3144import requests
3245requests .packages .urllib3 .disable_warnings (requests .packages .urllib3 .exceptions .InsecureRequestWarning )
@@ -310,12 +323,18 @@ def extract_favicon_from_html(html, url):
310323# # # # # # # #
311324
312325def extract_title_from_html (html ):
313- soup = BeautifulSoup ( html , 'html.parser' )
314- title = soup . title
315- if title :
316- title = title . string
326+ signal . alarm ( 60 )
327+ try :
328+ soup = BeautifulSoup ( html , 'html.parser' )
329+ title = soup . title
317330 if title :
318- return str (title )
331+ title = title .string
332+ if title :
333+ return str (title )
334+ except TimeoutException :
335+ pass
336+ else :
337+ signal .alarm (0 )
319338 return ''
320339
321340def extract_description_from_html (html ):
You can’t perform that action at this time.
0 commit comments