Skip to content

Commit baaecd1

Browse files
committed
added a basic websearch proxy
1 parent 7c671f2 commit baaecd1

File tree

1 file changed

+84
-2
lines changed

1 file changed

+84
-2
lines changed

koboldcpp.py

Lines changed: 84 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
import asyncio
2424
import socket
2525
import threading
26+
import html
27+
import urllib.parse as urlparse
2628
from concurrent.futures import ThreadPoolExecutor
2729
from datetime import datetime, timezone
2830

@@ -1268,6 +1270,71 @@ def detokenize_ids(tokids):
12681270
detokstr = ctypes.string_at(detok).decode("UTF-8","ignore")
12691271
return detokstr
12701272

1273+
# Performs a web search using DuckDuckGo and extracts text content from the top results.
1274+
def websearch(query):
1275+
if not query or query=="":
1276+
return []
1277+
import urllib.parse
1278+
import urllib.request
1279+
from html.parser import HTMLParser
1280+
num_results = 3
1281+
searchresults = []
1282+
class ExtractResultsParser(HTMLParser):
1283+
def __init__(self):
1284+
super().__init__()
1285+
self.results = []
1286+
self.recordingTitle = False
1287+
self.recordingDesc = False
1288+
self.currentrytxt = ""
1289+
self.currsegmenttxt = ""
1290+
1291+
def handle_starttag(self, tag, attrs):
1292+
if tag == "a":
1293+
# Check if the "class" attribute matches the target class
1294+
for attr_name, attr_value in attrs:
1295+
if not self.recordingTitle and attr_name == "class" and "result__a" in attr_value.split():
1296+
self.recordingTitle = True
1297+
self.currentrytxt = ""
1298+
self.currsegmenttxt = ""
1299+
if not self.recordingTitle and attr_name == "class" and "result__url" in attr_value.split():
1300+
self.recordingTitle = True
1301+
self.currsegmenttxt = ""
1302+
if not self.recordingDesc and attr_name == "class" and "result__snippet" in attr_value.split():
1303+
self.recordingDesc = True
1304+
self.currsegmenttxt = ""
1305+
1306+
def handle_endtag(self, tag):
1307+
if tag == "a" and self.recordingTitle:
1308+
self.recordingTitle = False
1309+
self.currentrytxt += self.currsegmenttxt.strip() + "\n"
1310+
self.currsegmenttxt = ""
1311+
if tag == "a" and self.recordingDesc:
1312+
self.recordingDesc = False
1313+
self.currentrytxt += self.currsegmenttxt.strip()
1314+
self.currsegmenttxt = ""
1315+
if self.currentrytxt != "":
1316+
self.results.append(self.currentrytxt.strip())
1317+
self.currentrytxt = ""
1318+
1319+
def handle_data(self, data):
1320+
if self.recordingTitle or self.recordingDesc:
1321+
self.currsegmenttxt += data
1322+
1323+
encoded_query = urllib.parse.quote(query)
1324+
search_url = f"https://html.duckduckgo.com/html/?q={encoded_query}"
1325+
1326+
try:
1327+
req = urllib.request.Request(search_url, headers={'User-Agent': 'Mozilla/5.0'})
1328+
with urllib.request.urlopen(req) as response:
1329+
search_html = response.read().decode('utf-8', errors='ignore')
1330+
parser = ExtractResultsParser()
1331+
parser.feed(search_html)
1332+
searchresults = parser.results[:num_results]
1333+
except Exception as e:
1334+
print(f"Error fetching URL {search_url}: {e}")
1335+
return ""
1336+
return searchresults
1337+
12711338
#################################################################
12721339
### A hacky simple HTTP server simulating a kobold api by Concedo
12731340
### we are intentionally NOT using flask, because we want MINIMAL dependencies
@@ -1797,8 +1864,6 @@ def secure_endpoint(self): #returns false if auth fails. caller should exit
17971864

17981865
def noscript_webui(self):
17991866
global modelbusy, sslvalid
1800-
import html
1801-
import urllib.parse as urlparse
18021867
parsed_url = urlparse.urlparse(self.path)
18031868
parsed_dict = urlparse.parse_qs(parsed_url.query)
18041869
reply = ""
@@ -2022,6 +2087,18 @@ def do_GET(self):
20222087
},
20232088
}).encode())
20242089

2090+
elif self.path.startswith(("/websearch")):
2091+
if args.websearch:
2092+
parsed_url = urlparse.urlparse(self.path)
2093+
parsed_dict = urlparse.parse_qs(parsed_url.query)
2094+
searchstr = (parsed_dict['q'][0]) if 'q' in parsed_dict else ""
2095+
if args.debugmode:
2096+
print(f"Searching web for: {searchstr}")
2097+
searchres = websearch(searchstr)
2098+
response_body = (json.dumps(searchres).encode())
2099+
else:
2100+
response_body = (json.dumps([]).encode())
2101+
20252102
elif self.path=="/api" or self.path=="/docs" or self.path.startswith(('/api/?json=','/api?json=','/docs/?json=','/docs?json=')):
20262103
content_type = 'text/html'
20272104
if embedded_kcpp_docs is None:
@@ -2765,6 +2842,7 @@ def hide_tooltip(event):
27652842
host_var = ctk.StringVar(value="")
27662843
multiuser_var = ctk.IntVar(value=1)
27672844
multiplayer_var = ctk.IntVar(value=has_multiplayer)
2845+
websearch_var = ctk.IntVar(value=0)
27682846
horde_name_var = ctk.StringVar(value="koboldcpp")
27692847
horde_gen_var = ctk.StringVar(value=maxhordelen)
27702848
horde_context_var = ctk.StringVar(value=maxhordectx)
@@ -3274,6 +3352,7 @@ def pickpremadetemplate():
32743352
makecheckbox(network_tab, "Quiet Mode", quietmode, 4,tooltiptxt="Prevents all generation related terminal output from being displayed.")
32753353
makecheckbox(network_tab, "NoCertify Mode (Insecure)", nocertifymode, 4, 1,tooltiptxt="Allows insecure SSL connections. Use this if you have cert errors and need to bypass certificate restrictions.")
32763354
makecheckbox(network_tab, "Shared Multiplayer", multiplayer_var, 5,tooltiptxt="Hosts a shared multiplayer session that others can join.")
3355+
makecheckbox(network_tab, "Enable WebSearch", websearch_var, 5, 1,tooltiptxt="Enable the local search engine proxy so Web Searches can be done.")
32773356

32783357
makefileentry(network_tab, "SSL Cert:", "Select SSL cert.pem file",ssl_cert_var, 7, width=200 ,filetypes=[("Unencrypted Certificate PEM", "*.pem")], singlerow=True, singlecol=False,tooltiptxt="Select your unencrypted .pem SSL certificate file for https.\nCan be generated with OpenSSL.")
32793358
makefileentry(network_tab, "SSL Key:", "Select SSL key.pem file", ssl_key_var, 9, width=200, filetypes=[("Unencrypted Key PEM", "*.pem")], singlerow=True, singlecol=False, tooltiptxt="Select your unencrypted .pem SSL key file for https.\nCan be generated with OpenSSL.")
@@ -3523,6 +3602,7 @@ def export_vars():
35233602
args.host = host_var.get()
35243603
args.multiuser = multiuser_var.get()
35253604
args.multiplayer = (multiplayer_var.get()==1)
3605+
args.websearch = (websearch_var.get()==1)
35263606

35273607
if usehorde_var.get() != 0:
35283608
args.hordemodelname = horde_name_var.get()
@@ -3700,6 +3780,7 @@ def import_vars(dict):
37003780
host_var.set(dict["host"] if ("host" in dict and dict["host"]) else "")
37013781
multiuser_var.set(dict["multiuser"] if ("multiuser" in dict) else 1)
37023782
multiplayer_var.set(dict["multiplayer"] if ("multiplayer" in dict) else 0)
3783+
websearch_var.set(dict["websearch"] if ("websearch" in dict) else 0)
37033784

37043785
horde_name_var.set(dict["hordemodelname"] if ("hordemodelname" in dict and dict["hordemodelname"]) else "koboldcpp")
37053786
horde_context_var.set(dict["hordemaxctx"] if ("hordemaxctx" in dict and dict["hordemaxctx"]) else maxhordectx)
@@ -4984,6 +5065,7 @@ def range_checker(arg: str):
49845065
advparser.add_argument("--promptlimit", help="Sets the maximum number of generated tokens, usable only with --prompt or --benchmark",metavar=('[token limit]'), type=int, default=100)
49855066
advparser.add_argument("--multiuser", help="Runs in multiuser mode, which queues incoming requests instead of blocking them.", metavar=('limit'), nargs='?', const=1, type=int, default=1)
49865067
advparser.add_argument("--multiplayer", help="Hosts a shared multiplayer session that others can join.", action='store_true')
5068+
advparser.add_argument("--websearch", help="Enable the local search engine proxy so Web Searches can be done.", action='store_true')
49875069
advparser.add_argument("--remotetunnel", help="Uses Cloudflare to create a remote tunnel, allowing you to access koboldcpp remotely over the internet even behind a firewall.", action='store_true')
49885070
advparser.add_argument("--highpriority", help="Experimental flag. If set, increases the process CPU priority, potentially speeding up generation. Use caution.", action='store_true')
49895071
advparser.add_argument("--foreground", help="Windows only. Sends the terminal to the foreground every time a new prompt is generated. This helps avoid some idle slowdown issues.", action='store_true')

0 commit comments

Comments
 (0)