Skip to content

Commit 4fe2b3e

Browse files
committed
Play with websearch.
1 parent ce5aed2 commit 4fe2b3e

File tree

2 files changed

+13
-6
lines changed

2 files changed

+13
-6
lines changed

klite.embd

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4252,8 +4252,6 @@ pre code,td,th{padding:0}pre code,table{background-color:transparent}.table,inpu
42524252
const ALLTALK_ID = 1001;
42534253
const OAI_TTS_ID = 1002;
42544254

4255-
const BIG_HD_RES_PX = 512; //when saving oversized images, could be 640?
4256-
42574255
const HD_RES_PX = 768;
42584256
const NO_HD_RES_PX = 512;
42594257
const SAVE_SLOTS = 10;

koboldcpp.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1816,7 +1816,7 @@ def websearch(query):
18161816
global websearch_lastresponse
18171817
if not query or query=="":
18181818
return []
1819-
query = query[:300] # only search first 300 chars, due to search engine limits
1819+
query = query[:499] # only search first 300 chars, due to search engine limits
18201820
if query==websearch_lastquery:
18211821
print("Returning cached websearch...")
18221822
return websearch_lastresponse
@@ -1825,24 +1825,28 @@ def websearch(query):
18251825
import difflib
18261826
from html.parser import HTMLParser
18271827
from concurrent.futures import ThreadPoolExecutor
1828-
num_results = 3
1828+
num_results = 10
18291829
searchresults = []
18301830
if args.debugmode != -1 and not args.quiet:
18311831
print("Performing new websearch...")
18321832

18331833
def fetch_searched_webpage(url):
1834-
if args.debugmode:
1834+
if args.debugmode != -1 and not args.quiet:
18351835
utfprint(f"WebSearch URL: {url}")
18361836
try:
18371837
req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'})
18381838
with urllib.request.urlopen(req, timeout=15) as response:
18391839
html_content = response.read().decode('utf-8', errors='ignore')
1840+
# if args.debugmode != -1 and not args.quiet:
1841+
# print(f"Returning results with Googlebot compatible agent: {html_content}")
18401842
return html_content
18411843
except urllib.error.HTTPError: #we got blocked? try 1 more time with a different user agent
18421844
try:
18431845
req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36'})
18441846
with urllib.request.urlopen(req, timeout=15) as response:
18451847
html_content = response.read().decode('utf-8', errors='ignore')
1848+
# if args.debugmode != -1 and not args.quiet:
1849+
# print(f"Returning results with AppleWebKit/KHTML/Gecko compatible agent: {html_content}")
18461850
return html_content
18471851
except Exception as e:
18481852
if args.debugmode != -1 and not args.quiet:
@@ -1856,11 +1860,16 @@ def fetch_webpages_parallel(urls):
18561860
with ThreadPoolExecutor() as executor:
18571861
# Submit tasks and gather results
18581862
results = list(executor.map(fetch_searched_webpage, urls))
1863+
if args.debugmode != -1 and not args.quiet:
1864+
print(f"Returning results: {urls}")
1865+
# print(f"Returning results: {results}")
18591866
return results
18601867

18611868
def normalize_page_text(text):
18621869
text = re.sub(r'\s+([.,!?])', r'\1', text) # Remove spaces before punctuation
18631870
# text = re.sub(r'([.,!?])([^\s])', r'\1 \2', text) # Ensure a single space follows punctuation, if not at the end of a line
1871+
# if args.debugmode != -1 and not args.quiet:
1872+
# print(f"Returning text: {text}")
18641873
return text
18651874

18661875
class VisibleTextParser(HTMLParser):
@@ -1953,7 +1962,7 @@ def handle_data(self, data):
19531962
if matches.size > 100 and desclen-matches.size < 100: #good enough match
19541963
# expand description by some chars both sides
19551964
expandamtbefore = 200
1956-
expandamtafter = 800
1965+
expandamtafter = 3500
19571966
startpt = matches.a - expandamtbefore
19581967
startpt = 0 if startpt < 0 else startpt
19591968
endpt = matches.a + expandamtafter + desclen

0 commit comments

Comments
 (0)