@@ -1969,6 +1969,7 @@ def websearch(query):
19691969 utfprint ("Performing new websearch..." ,1 )
19701970
19711971 def fetch_searched_webpage (url , random_agent = False ):
1972+ from urllib .parse import quote , urlsplit , urlunsplit
19721973 uagent = 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'
19731974 if random_agent :
19741975 agents = ["Mozilla/5.0 (Macintosh; Intel Mac OS X 13_2) Gecko/20100101 Firefox/114.0" ,
@@ -1979,17 +1980,23 @@ def fetch_searched_webpage(url, random_agent=False):
19791980 uagent = random .choice (agents )
19801981 if args .debugmode :
19811982 utfprint (f"WebSearch URL: { url } " )
1983+ # Encode non-ASCII parts of the URL
19821984 try :
1985+ split_url = urlsplit (url )
1986+ encoded_path = quote (split_url .path )
1987+ encoded_url = urlunsplit ((split_url .scheme , split_url .netloc , encoded_path , split_url .query , split_url .fragment ))
1988+
19831989 ssl_cert_dir = os .environ .get ('SSL_CERT_DIR' )
19841990 if not ssl_cert_dir and not nocertify and os .name != 'nt' :
19851991 os .environ ['SSL_CERT_DIR' ] = '/etc/ssl/certs'
1986- req = urllib .request .Request (url , headers = {'User-Agent' : uagent })
1992+
1993+ req = urllib .request .Request (encoded_url , headers = {'User-Agent' : uagent })
19871994 with urllib .request .urlopen (req , timeout = 15 ) as response :
19881995 html_content = response .read ().decode ('utf-8' , errors = 'ignore' )
19891996 return html_content
19901997 except urllib .error .HTTPError : #we got blocked? try 1 more time with a different user agent
19911998 try :
1992- req = urllib .request .Request (url , headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36' })
1999+ req = urllib .request .Request (encoded_url , headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36' })
19932000 with urllib .request .urlopen (req , timeout = 15 ) as response :
19942001 html_content = response .read ().decode ('utf-8' , errors = 'ignore' )
19952002 return html_content
0 commit comments