1- """
2- research web module
3- """
41import re
52from typing import List
63from langchain_community .tools import DuckDuckGoSearchResults
74from googlesearch import search as google_search
85import requests
96from bs4 import BeautifulSoup
107
11- def search_on_web (query : str , search_engine : str = "Google" , max_results : int = 10 ) -> List [str ]:
8+ def search_on_web (query : str , search_engine : str = "Google" , max_results : int = 10 , port : int = 8080 ) -> List [str ]:
129 """
1310 Searches the web for a given query using specified search engine options.
1411
1512 Args:
1613 query (str): The search query to find on the internet.
17- search_engine (str, optional): Specifies the search engine to use, options include 'Google', 'DuckDuckGo', or 'Bing '. Default is 'Google'.
14+ search_engine (str, optional): Specifies the search engine to use, options include 'Google', 'DuckDuckGo', 'Bing', or 'SearXNG '. Default is 'Google'.
1815 max_results (int, optional): The maximum number of search results to return.
16+ port (int, optional): The port number to use when searching with 'SearXNG'. Default is 8080.
1917
2018 Returns:
2119 List[str]: A list of URLs as strings that are the search results.
2220
2321 Raises:
24- ValueError: If the search engine specified is neither 'Google', 'DuckDuckGo', nor 'Bing' .
22+ ValueError: If the search engine specified is not supported .
2523
2624 Example:
2725 >>> search_on_web("example query", search_engine="Google", max_results=5)
2826 ['http://example.com', 'http://example.org', ...]
29-
30- This function allows switching between Google, DuckDuckGo, and Bing to perform
31- internet searches, returning a list of result URLs.
3227 """
33-
28+
3429 if search_engine .lower () == "google" :
3530 res = []
3631 for url in google_search (query , stop = max_results ):
3732 res .append (url )
3833 return res
39-
34+
4035 elif search_engine .lower () == "duckduckgo" :
4136 research = DuckDuckGoSearchResults (max_results = max_results )
4237 res = research .run (query )
4338 links = re .findall (r'https?://[^\s,\]]+' , res )
4439 return links
45-
40+
4641 elif search_engine .lower () == "bing" :
4742 headers = {
4843 "User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
@@ -51,11 +46,24 @@ def search_on_web(query: str, search_engine: str = "Google", max_results: int =
5146 response = requests .get (search_url , headers = headers )
5247 response .raise_for_status ()
5348 soup = BeautifulSoup (response .text , "html.parser" )
54-
49+
5550 search_results = []
5651 for result in soup .find_all ('li' , class_ = 'b_algo' , limit = max_results ):
5752 link = result .find ('a' )['href' ]
5853 search_results .append (link )
5954 return search_results
60-
61- raise ValueError ("The only search engines available are DuckDuckGo, Google, or Bing" )
55+
56+ elif search_engine .lower () == "searxng" :
57+ url = f"http://localhost:{ port } "
58+ params = {"q" : query , "format" : "json" }
59+
60+ # Send the GET request to the server
61+ response = requests .get (url , params = params )
62+
63+ # Parse the response and limit to the specified max_results
64+ data = response .json ()
65+ limited_results = data ["results" ][:max_results ]
66+ return limited_results
67+
68+ else :
69+ raise ValueError ("The only search engines available are DuckDuckGo, Google, Bing, or SearXNG" )
0 commit comments