Skip to content

Commit ecb2cbf

Browse files
committed
fix url params parse search
1 parent fd0eecd commit ecb2cbf

File tree

1 file changed

+9
-2
lines changed

1 file changed

+9
-2
lines changed

koboldcpp.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1969,6 +1969,7 @@ def websearch(query):
19691969
utfprint("Performing new websearch...",1)
19701970

19711971
def fetch_searched_webpage(url, random_agent=False):
1972+
from urllib.parse import quote, urlsplit, urlunsplit
19721973
uagent = 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'
19731974
if random_agent:
19741975
agents = ["Mozilla/5.0 (Macintosh; Intel Mac OS X 13_2) Gecko/20100101 Firefox/114.0",
@@ -1979,17 +1980,23 @@ def fetch_searched_webpage(url, random_agent=False):
19791980
uagent = random.choice(agents)
19801981
if args.debugmode:
19811982
utfprint(f"WebSearch URL: {url}")
1983+
# Encode non-ASCII parts of the URL
19821984
try:
1985+
split_url = urlsplit(url)
1986+
encoded_path = quote(split_url.path)
1987+
encoded_url = urlunsplit((split_url.scheme, split_url.netloc, encoded_path, split_url.query, split_url.fragment))
1988+
19831989
ssl_cert_dir = os.environ.get('SSL_CERT_DIR')
19841990
if not ssl_cert_dir and not nocertify and os.name != 'nt':
19851991
os.environ['SSL_CERT_DIR'] = '/etc/ssl/certs'
1986-
req = urllib.request.Request(url, headers={'User-Agent': uagent})
1992+
1993+
req = urllib.request.Request(encoded_url, headers={'User-Agent': uagent})
19871994
with urllib.request.urlopen(req, timeout=15) as response:
19881995
html_content = response.read().decode('utf-8', errors='ignore')
19891996
return html_content
19901997
except urllib.error.HTTPError: #we got blocked? try 1 more time with a different user agent
19911998
try:
1992-
req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36'})
1999+
req = urllib.request.Request(encoded_url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36'})
19932000
with urllib.request.urlopen(req, timeout=15) as response:
19942001
html_content = response.read().decode('utf-8', errors='ignore')
19952002
return html_content

0 commit comments

Comments
 (0)