Skip to content

Commit 6026501

Browse files
committed
websearch functional
1 parent 709dab6 commit 6026501

File tree

1 file changed

+10
-3
lines changed

1 file changed

+10
-3
lines changed

koboldcpp.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@
5858
modelbusy = threading.Lock()
5959
requestsinqueue = 0
6060
defaultport = 5001
61-
KcppVersion = "1.80.3"
61+
KcppVersion = "1.81"
6262
showdebug = True
6363
guimode = False
6464
showsamplerwarning = True
@@ -1310,6 +1310,11 @@ def fetch_webpages_parallel(urls):
13101310
results = list(executor.map(fetch_searched_webpage, urls))
13111311
return results
13121312

1313+
def normalize_page_text(text):
1314+
text = re.sub(r'\s+([.,!?])', r'\1', text) # Remove spaces before punctuation
1315+
text = re.sub(r'([.,!?])([^\s])', r'\1 \2', text) # Ensure a single space follows punctuation, if not at the end of a line
1316+
return text
1317+
13131318
class VisibleTextParser(HTMLParser):
13141319
def __init__(self):
13151320
super().__init__()
@@ -1393,12 +1398,14 @@ def handle_data(self, data):
13931398
parser2 = VisibleTextParser()
13941399
parser2.feed(html_content)
13951400
scraped = parser2.get_text().strip()
1401+
scraped = normalize_page_text(scraped)
1402+
desc = normalize_page_text(desc)
13961403
s = difflib.SequenceMatcher(None, scraped.lower(), desc.lower(), autojunk=False)
13971404
matches = s.find_longest_match(0, len(scraped), 0, desclen)
13981405
if matches.size > 100 and desclen-matches.size < 100: #good enough match
13991406
# expand description by some chars both sides
1400-
expandamtbefore = 250
1401-
expandamtafter = 750
1407+
expandamtbefore = 200
1408+
expandamtafter = 800
14021409
startpt = matches.a - expandamtbefore
14031410
startpt = 0 if startpt < 0 else startpt
14041411
endpt = matches.a + expandamtafter + desclen

0 commit comments

Comments
 (0)