1010
1111def search_on_web (query : str , search_engine : str = "Google" ,
1212 max_results : int = 10 , port : int = 8080 ,
13- timeout : int = 10 ) -> List [str ]:
13+ timeout : int = 10 , proxy : str | dict = None ) -> List [str ]:
1414 """
1515 Searches the web for a given query using specified search
1616 engine options and filters out PDF links.
@@ -23,6 +23,7 @@ def search_on_web(query: str, search_engine: str = "Google",
2323 port (int, optional): The port number to use when searching with 'SearXNG'. Default is 8080.
2424 timeout (int, optional): The number of seconds to wait
2525 for a response from a request. Default is 10 seconds.
26+ proxy (dict or string, optional): The proxy server to use for the request. Default is None.
2627
2728 Returns:
2829 List[str]: A list of URLs as strings that are the search results, excluding any PDF links.
@@ -36,6 +37,22 @@ def search_on_web(query: str, search_engine: str = "Google",
3637 ['http://example.com', 'http://example.org', ...]
3738 """
3839
40+ def format_proxy (proxy ):
41+ if isinstance (proxy , dict ):
42+ server = proxy .get ('server' )
43+ username = proxy .get ('username' )
44+ password = proxy .get ('password' )
45+
46+ if all ([username , password , server ]):
47+ proxy_url = f"http://{ username } :{ password } @{ server } "
48+ return proxy_url
49+ else :
50+ raise ValueError ("Proxy dictionary is missing required fields." )
51+ elif isinstance (proxy , str ):
52+ return proxy # "https://username:password@ip:port"
53+ else :
54+ raise TypeError ("Proxy should be a dictionary or a string." )
55+
3956 def filter_pdf_links (links : List [str ]) -> List [str ]:
4057 """
4158 Filters out any links that point to PDF files.
@@ -48,9 +65,12 @@ def filter_pdf_links(links: List[str]) -> List[str]:
4865 """
4966 return [link for link in links if not link .lower ().endswith ('.pdf' )]
5067
68+ if proxy :
69+ proxy = format_proxy (proxy )
70+
5171 if search_engine .lower () == "google" :
5272 res = []
53- for url in google_search (query , stop = max_results ):
73+ for url in google_search (query , num_results = max_results , proxy = proxy ):
5474 res .append (url )
5575 return filter_pdf_links (res )
5676
0 commit comments