|
23 | 23 | import asyncio |
24 | 24 | import socket |
25 | 25 | import threading |
| 26 | +import html |
| 27 | +import urllib.parse as urlparse |
26 | 28 | from concurrent.futures import ThreadPoolExecutor |
27 | 29 | from datetime import datetime, timezone |
28 | 30 |
|
@@ -1268,6 +1270,71 @@ def detokenize_ids(tokids): |
1268 | 1270 | detokstr = ctypes.string_at(detok).decode("UTF-8","ignore") |
1269 | 1271 | return detokstr |
1270 | 1272 |
|
| 1273 | +# Performs a web search using DuckDuckGo and extracts text content from the top results. |
| 1274 | +def websearch(query): |
| 1275 | + if not query or query=="": |
| 1276 | + return [] |
| 1277 | + import urllib.parse |
| 1278 | + import urllib.request |
| 1279 | + from html.parser import HTMLParser |
| 1280 | + num_results = 3 |
| 1281 | + searchresults = [] |
| 1282 | + class ExtractResultsParser(HTMLParser): |
| 1283 | + def __init__(self): |
| 1284 | + super().__init__() |
| 1285 | + self.results = [] |
| 1286 | + self.recordingTitle = False |
| 1287 | + self.recordingDesc = False |
| 1288 | + self.currentrytxt = "" |
| 1289 | + self.currsegmenttxt = "" |
| 1290 | + |
| 1291 | + def handle_starttag(self, tag, attrs): |
| 1292 | + if tag == "a": |
| 1293 | + # Check if the "class" attribute matches the target class |
| 1294 | + for attr_name, attr_value in attrs: |
| 1295 | + if not self.recordingTitle and attr_name == "class" and "result__a" in attr_value.split(): |
| 1296 | + self.recordingTitle = True |
| 1297 | + self.currentrytxt = "" |
| 1298 | + self.currsegmenttxt = "" |
| 1299 | + if not self.recordingTitle and attr_name == "class" and "result__url" in attr_value.split(): |
| 1300 | + self.recordingTitle = True |
| 1301 | + self.currsegmenttxt = "" |
| 1302 | + if not self.recordingDesc and attr_name == "class" and "result__snippet" in attr_value.split(): |
| 1303 | + self.recordingDesc = True |
| 1304 | + self.currsegmenttxt = "" |
| 1305 | + |
| 1306 | + def handle_endtag(self, tag): |
| 1307 | + if tag == "a" and self.recordingTitle: |
| 1308 | + self.recordingTitle = False |
| 1309 | + self.currentrytxt += self.currsegmenttxt.strip() + "\n" |
| 1310 | + self.currsegmenttxt = "" |
| 1311 | + if tag == "a" and self.recordingDesc: |
| 1312 | + self.recordingDesc = False |
| 1313 | + self.currentrytxt += self.currsegmenttxt.strip() |
| 1314 | + self.currsegmenttxt = "" |
| 1315 | + if self.currentrytxt != "": |
| 1316 | + self.results.append(self.currentrytxt.strip()) |
| 1317 | + self.currentrytxt = "" |
| 1318 | + |
| 1319 | + def handle_data(self, data): |
| 1320 | + if self.recordingTitle or self.recordingDesc: |
| 1321 | + self.currsegmenttxt += data |
| 1322 | + |
| 1323 | + encoded_query = urllib.parse.quote(query) |
| 1324 | + search_url = f"https://html.duckduckgo.com/html/?q={encoded_query}" |
| 1325 | + |
| 1326 | + try: |
| 1327 | + req = urllib.request.Request(search_url, headers={'User-Agent': 'Mozilla/5.0'}) |
| 1328 | + with urllib.request.urlopen(req) as response: |
| 1329 | + search_html = response.read().decode('utf-8', errors='ignore') |
| 1330 | + parser = ExtractResultsParser() |
| 1331 | + parser.feed(search_html) |
| 1332 | + searchresults = parser.results[:num_results] |
| 1333 | + except Exception as e: |
| 1334 | + print(f"Error fetching URL {search_url}: {e}") |
| 1335 | + return "" |
| 1336 | + return searchresults |
| 1337 | + |
1271 | 1338 | ################################################################# |
1272 | 1339 | ### A hacky simple HTTP server simulating a kobold api by Concedo |
1273 | 1340 | ### we are intentionally NOT using flask, because we want MINIMAL dependencies |
@@ -1797,8 +1864,6 @@ def secure_endpoint(self): #returns false if auth fails. caller should exit |
1797 | 1864 |
|
1798 | 1865 | def noscript_webui(self): |
1799 | 1866 | global modelbusy, sslvalid |
1800 | | - import html |
1801 | | - import urllib.parse as urlparse |
1802 | 1867 | parsed_url = urlparse.urlparse(self.path) |
1803 | 1868 | parsed_dict = urlparse.parse_qs(parsed_url.query) |
1804 | 1869 | reply = "" |
@@ -2022,6 +2087,18 @@ def do_GET(self): |
2022 | 2087 | }, |
2023 | 2088 | }).encode()) |
2024 | 2089 |
|
| 2090 | + elif self.path.startswith(("/websearch")): |
| 2091 | + if args.websearch: |
| 2092 | + parsed_url = urlparse.urlparse(self.path) |
| 2093 | + parsed_dict = urlparse.parse_qs(parsed_url.query) |
| 2094 | + searchstr = (parsed_dict['q'][0]) if 'q' in parsed_dict else "" |
| 2095 | + if args.debugmode: |
| 2096 | + print(f"Searching web for: {searchstr}") |
| 2097 | + searchres = websearch(searchstr) |
| 2098 | + response_body = (json.dumps(searchres).encode()) |
| 2099 | + else: |
| 2100 | + response_body = (json.dumps([]).encode()) |
| 2101 | + |
2025 | 2102 | elif self.path=="/api" or self.path=="/docs" or self.path.startswith(('/api/?json=','/api?json=','/docs/?json=','/docs?json=')): |
2026 | 2103 | content_type = 'text/html' |
2027 | 2104 | if embedded_kcpp_docs is None: |
@@ -2765,6 +2842,7 @@ def hide_tooltip(event): |
2765 | 2842 | host_var = ctk.StringVar(value="") |
2766 | 2843 | multiuser_var = ctk.IntVar(value=1) |
2767 | 2844 | multiplayer_var = ctk.IntVar(value=has_multiplayer) |
| 2845 | + websearch_var = ctk.IntVar(value=0) |
2768 | 2846 | horde_name_var = ctk.StringVar(value="koboldcpp") |
2769 | 2847 | horde_gen_var = ctk.StringVar(value=maxhordelen) |
2770 | 2848 | horde_context_var = ctk.StringVar(value=maxhordectx) |
@@ -3274,6 +3352,7 @@ def pickpremadetemplate(): |
3274 | 3352 | makecheckbox(network_tab, "Quiet Mode", quietmode, 4,tooltiptxt="Prevents all generation related terminal output from being displayed.") |
3275 | 3353 | makecheckbox(network_tab, "NoCertify Mode (Insecure)", nocertifymode, 4, 1,tooltiptxt="Allows insecure SSL connections. Use this if you have cert errors and need to bypass certificate restrictions.") |
3276 | 3354 | makecheckbox(network_tab, "Shared Multiplayer", multiplayer_var, 5,tooltiptxt="Hosts a shared multiplayer session that others can join.") |
| 3355 | + makecheckbox(network_tab, "Enable WebSearch", websearch_var, 5, 1,tooltiptxt="Enable the local search engine proxy so Web Searches can be done.") |
3277 | 3356 |
|
3278 | 3357 | makefileentry(network_tab, "SSL Cert:", "Select SSL cert.pem file",ssl_cert_var, 7, width=200 ,filetypes=[("Unencrypted Certificate PEM", "*.pem")], singlerow=True, singlecol=False,tooltiptxt="Select your unencrypted .pem SSL certificate file for https.\nCan be generated with OpenSSL.") |
3279 | 3358 | makefileentry(network_tab, "SSL Key:", "Select SSL key.pem file", ssl_key_var, 9, width=200, filetypes=[("Unencrypted Key PEM", "*.pem")], singlerow=True, singlecol=False, tooltiptxt="Select your unencrypted .pem SSL key file for https.\nCan be generated with OpenSSL.") |
@@ -3523,6 +3602,7 @@ def export_vars(): |
3523 | 3602 | args.host = host_var.get() |
3524 | 3603 | args.multiuser = multiuser_var.get() |
3525 | 3604 | args.multiplayer = (multiplayer_var.get()==1) |
| 3605 | + args.websearch = (websearch_var.get()==1) |
3526 | 3606 |
|
3527 | 3607 | if usehorde_var.get() != 0: |
3528 | 3608 | args.hordemodelname = horde_name_var.get() |
@@ -3700,6 +3780,7 @@ def import_vars(dict): |
3700 | 3780 | host_var.set(dict["host"] if ("host" in dict and dict["host"]) else "") |
3701 | 3781 | multiuser_var.set(dict["multiuser"] if ("multiuser" in dict) else 1) |
3702 | 3782 | multiplayer_var.set(dict["multiplayer"] if ("multiplayer" in dict) else 0) |
| 3783 | + websearch_var.set(dict["websearch"] if ("websearch" in dict) else 0) |
3703 | 3784 |
|
3704 | 3785 | horde_name_var.set(dict["hordemodelname"] if ("hordemodelname" in dict and dict["hordemodelname"]) else "koboldcpp") |
3705 | 3786 | horde_context_var.set(dict["hordemaxctx"] if ("hordemaxctx" in dict and dict["hordemaxctx"]) else maxhordectx) |
@@ -4984,6 +5065,7 @@ def range_checker(arg: str): |
4984 | 5065 | advparser.add_argument("--promptlimit", help="Sets the maximum number of generated tokens, usable only with --prompt or --benchmark",metavar=('[token limit]'), type=int, default=100) |
4985 | 5066 | advparser.add_argument("--multiuser", help="Runs in multiuser mode, which queues incoming requests instead of blocking them.", metavar=('limit'), nargs='?', const=1, type=int, default=1) |
4986 | 5067 | advparser.add_argument("--multiplayer", help="Hosts a shared multiplayer session that others can join.", action='store_true') |
| 5068 | + advparser.add_argument("--websearch", help="Enable the local search engine proxy so Web Searches can be done.", action='store_true') |
4987 | 5069 | advparser.add_argument("--remotetunnel", help="Uses Cloudflare to create a remote tunnel, allowing you to access koboldcpp remotely over the internet even behind a firewall.", action='store_true') |
4988 | 5070 | advparser.add_argument("--highpriority", help="Experimental flag. If set, increases the process CPU priority, potentially speeding up generation. Use caution.", action='store_true') |
4989 | 5071 | advparser.add_argument("--foreground", help="Windows only. Sends the terminal to the foreground every time a new prompt is generated. This helps avoid some idle slowdown issues.", action='store_true') |
|
0 commit comments