|
1 | 1 | """ |
2 | | -Module for making the request on the web |
| 2 | +research web module |
3 | 3 | """ |
4 | 4 | import re |
5 | 5 | from typing import List |
6 | 6 | from langchain_community.tools import DuckDuckGoSearchResults |
7 | 7 | from googlesearch import search as google_search |
8 | | - |
| 8 | +import requests |
| 9 | +from bs4 import BeautifulSoup |
9 | 10 |
|
10 | 11 | def search_on_web(query: str, search_engine: str = "Google", max_results: int = 10) -> List[str]: |
11 | 12 | """ |
12 | 13 | Searches the web for a given query using specified search engine options. |
13 | 14 |
|
14 | 15 | Args: |
15 | 16 | query (str): The search query to find on the internet. |
16 | | - search_engine (str, optional): Specifies the search engine to use, options include 'Google' or 'DuckDuckGo'. Default is 'Google'. |
| 17 | + search_engine (str, optional): Specifies the search engine to use, options include 'Google', 'DuckDuckGo', or 'Bing'. Default is 'Google'. |
17 | 18 | max_results (int, optional): The maximum number of search results to return. |
18 | 19 |
|
19 | 20 | Returns: |
20 | 21 | List[str]: A list of URLs as strings that are the search results. |
21 | 22 |
|
22 | 23 | Raises: |
23 | | - ValueError: If the search engine specified is neither 'Google' nor 'DuckDuckGo'. |
| 24 | + ValueError: If the search engine specified is neither 'Google', 'DuckDuckGo', nor 'Bing'. |
24 | 25 |
|
25 | 26 | Example: |
26 | 27 | >>> search_on_web("example query", search_engine="Google", max_results=5) |
27 | 28 | ['http://example.com', 'http://example.org', ...] |
28 | 29 |
|
29 | | - This function allows switching between Google and DuckDuckGo to perform internet searches, returning a list of result URLs. |
| 30 | + This function allows switching between Google, DuckDuckGo, and Bing to perform |
| 31 | + internet searches, returning a list of result URLs. |
30 | 32 | """ |
31 | 33 |
|
32 | 34 | if search_engine.lower() == "google": |
33 | 35 | res = [] |
34 | | - |
35 | 36 | for url in google_search(query, stop=max_results): |
36 | 37 | res.append(url) |
37 | 38 | return res |
| 39 | + |
38 | 40 | elif search_engine.lower() == "duckduckgo": |
39 | 41 | research = DuckDuckGoSearchResults(max_results=max_results) |
40 | 42 | res = research.run(query) |
41 | | - |
42 | 43 | links = re.findall(r'https?://[^\s,\]]+', res) |
43 | | - |
44 | 44 | return links |
45 | | - raise ValueError( |
46 | | - "The only search engines available are DuckDuckGo or Google") |
| 45 | + |
| 46 | + elif search_engine.lower() == "bing": |
| 47 | + headers = { |
| 48 | + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" |
| 49 | + } |
| 50 | + search_url = f"https://www.bing.com/search?q={query}" |
| 51 | + response = requests.get(search_url, headers=headers) |
| 52 | + response.raise_for_status() |
| 53 | + soup = BeautifulSoup(response.text, "html.parser") |
| 54 | + |
| 55 | + search_results = [] |
| 56 | + for result in soup.find_all('li', class_='b_algo', limit=max_results): |
| 57 | + link = result.find('a')['href'] |
| 58 | + search_results.append(link) |
| 59 | + return search_results |
| 60 | + |
| 61 | + raise ValueError("The only search engines available are DuckDuckGo, Google, or Bing") |
0 commit comments