Skip to content

Commit 8c16fd2

Browse files
committed
Add web search processor
1 parent f219ae3 commit 8c16fd2

File tree

3 files changed

+109
-2
lines changed

3 files changed

+109
-2
lines changed

llmstack/config.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,5 @@ use_remote_job_queue="False"
1616
cache_backend="locmem.LocMemCache"
1717
database_name="./llmstack.sqlite"
1818
database_engine="sqlite3"
19-
debug="True"
19+
debug="True"
20+
playwright_url=""

llmstack/play/actors/agent.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1+
import importlib
12
import logging
23
import time
3-
import openai
44
import uuid
55
import orjson as json
66
from typing import Any
@@ -59,6 +59,8 @@ def run(self) -> None:
5959
Message(message_type=MessageType.BEGIN, message=None, message_to=self._id))
6060

6161
def on_receive(self, message: Message) -> Any:
62+
import openai
63+
importlib.reload(openai)
6264
if message.message_type == MessageType.BEGIN and message.message_to == self._id:
6365
logger.info(f'Agent actor {self.actor_urn} started')
6466

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
import logging
2+
from enum import Enum
3+
from typing import List
4+
5+
from asgiref.sync import async_to_sync
6+
from pydantic import Field
7+
8+
from llmstack.processors.providers.api_processor_interface import ApiProcessorInterface, ApiProcessorSchema
9+
10+
logger = logging.getLogger(__name__)
11+
12+
13+
class SearchEngine(str, Enum):
14+
GOOGLE = 'Google'
15+
16+
def __str__(self):
17+
return self.value
18+
19+
20+
class WebSearchConfiguration(ApiProcessorSchema):
21+
search_engine: SearchEngine = Field(
22+
default=SearchEngine.GOOGLE,
23+
description='Search engine to use',
24+
widget='customselect',
25+
advanced_parameter=True,
26+
)
27+
k: int = Field(
28+
default=5,
29+
description='Number of results to return',
30+
advanced_parameter=True,
31+
)
32+
33+
34+
class WebSearchInput(ApiProcessorSchema):
35+
query: str = Field(..., description='Query to search for',
36+
widget='textarea')
37+
38+
39+
class WebSearchResult(ApiProcessorSchema):
40+
text: str
41+
source: str
42+
43+
44+
class WebSearchOutput(ApiProcessorSchema):
45+
results: List[WebSearchResult] = Field(
46+
default=[], description='Search results')
47+
48+
49+
class WebSearch(ApiProcessorInterface[WebSearchInput, WebSearchOutput, WebSearchConfiguration]):
50+
"""
51+
Text summarizer API processor
52+
"""
53+
54+
def process_session_data(self, session_data):
55+
self._chat_history = session_data['chat_history'] if 'chat_history' in session_data else [
56+
]
57+
self._context = session_data['context'] if 'context' in session_data else ''
58+
59+
@staticmethod
60+
def name() -> str:
61+
return 'Web Search'
62+
63+
@staticmethod
64+
def slug() -> str:
65+
return 'web_search'
66+
67+
@staticmethod
68+
def description() -> str:
69+
return 'Search the web for answers'
70+
71+
@staticmethod
72+
def provider_slug() -> str:
73+
return 'promptly'
74+
75+
def process(self) -> dict:
76+
output_stream = self._output_stream
77+
78+
query = self._input.query
79+
k = self._config.k
80+
81+
search_url = f'https://www.google.com/search?q={query}'
82+
83+
# Open playwright browser and search
84+
from playwright.sync_api import sync_playwright
85+
from django.conf import settings
86+
with sync_playwright() as p:
87+
browser = p.chromium.connect(ws_endpoint=settings.PLAYWRIGHT_URL) if hasattr(
88+
settings, 'PLAYWRIGHT_URL') and settings.PLAYWRIGHT_URL else p.chromium.launch()
89+
page = browser.new_page()
90+
page.goto(search_url)
91+
page.wait_for_selector('div#main')
92+
results = page.query_selector_all('div#main div.g')
93+
results = results[:k]
94+
results = list(map(lambda x: WebSearchResult(
95+
text=x.text_content(), source=x.query_selector('a').get_attribute('href')), results))
96+
browser.close()
97+
98+
async_to_sync(output_stream.write)(WebSearchOutput(
99+
results=results
100+
))
101+
102+
output = output_stream.finalize()
103+
104+
return output

0 commit comments

Comments
 (0)