77from googlesearch import search as google_search
88import requests
99from bs4 import BeautifulSoup
10+ import json
1011
1112def search_on_web (query : str , search_engine : str = "Google" ,
1213 max_results : int = 10 , port : int = 8080 ,
13- timeout : int = 10 , proxy : str | dict = None ) -> List [str ]:
14+ timeout : int = 10 , proxy : str | dict = None ,
15+ serper_api_key : str = None ) -> List [str ]:
1416 """Search web function with improved error handling and validation"""
1517
1618 # Input validation
1719 if not query or not isinstance (query , str ):
1820 raise ValueError ("Query must be a non-empty string" )
1921
2022 search_engine = search_engine .lower ()
21- valid_engines = {"google" , "duckduckgo" , "bing" , "searxng" }
23+ valid_engines = {"google" , "duckduckgo" , "bing" , "searxng" , "serper" }
2224 if search_engine not in valid_engines :
2325 raise ValueError (f"Search engine must be one of: { ', ' .join (valid_engines )} " )
2426
@@ -42,7 +44,10 @@ def search_on_web(query: str, search_engine: str = "Google",
4244
4345 elif search_engine == "searxng" :
4446 results = _search_searxng (query , max_results , port , timeout )
45-
47+
48+ elif search_engine .lower () == "serper" :
49+ results = _search_serper (query , max_results , serper_api_key , timeout )
50+
4651 return filter_pdf_links (results )
4752
4853 except requests .Timeout :
@@ -76,6 +81,25 @@ def _search_searxng(query: str, max_results: int, port: int, timeout: int) -> Li
7681 response .raise_for_status ()
7782 return [result ['url' ] for result in response .json ().get ("results" , [])[:max_results ]]
7883
84+ def _search_serper (query : str , max_results : int , serper_api_key : str , timeout : int ) -> List [str ]:
85+ """Helper function for serper api"""
86+ if not serper_api_key :
87+ raise ValueError ("API key is required for serper api." )
88+
89+ url = "https://google.serper.dev/search"
90+ payload = json .dumps ({
91+ "q" : query ,
92+ "num" : max_results
93+ })
94+ headers = {
95+ 'X-API-KEY' : serper_api_key ,
96+ 'Content-Type' : 'application/json'
97+ }
98+ response = requests .post (url , headers = headers , data = payload , timeout = timeout )
99+ response .raise_for_status ()
100+ return [result .get ("link" ) for result in response .json ().get ("organic" , [])]
101+
102+
79103def format_proxy (proxy ):
80104 if isinstance (proxy , dict ):
81105 server = proxy .get ('server' )
@@ -102,4 +126,4 @@ def filter_pdf_links(links: List[str]) -> List[str]:
102126 Returns:
103127 List[str]: A list of URLs excluding any that end with '.pdf'.
104128 """
105- return [link for link in links if not link .lower ().endswith ('.pdf' )]
129+ return [link for link in links if not link .lower ().endswith ('.pdf' )]
0 commit comments