|
2 | 2 |
|
3 | 3 | import argparse |
4 | 4 | import sys |
5 | | -import traceback |
6 | 5 | import time |
7 | | -import random |
8 | 6 | from duckduckgo_search import DDGS |
9 | | -from duckduckgo_search.exceptions import DuckDuckGoSearchException |
10 | 7 |
|
11 | | -def get_random_user_agent(): |
12 | | - """Return a random User-Agent string.""" |
13 | | - user_agents = [ |
14 | | - 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', |
15 | | - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', |
16 | | - 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Edge/120.0.0.0', |
17 | | - 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36' |
18 | | - ] |
19 | | - return random.choice(user_agents) |
20 | | - |
21 | | -def search_with_retry(query, max_results=10, max_retries=3, initial_delay=2): |
| 8 | +def search_with_retry(query, max_results=10, max_retries=3): |
22 | 9 | """ |
23 | | - Perform search with retry mechanism. |
| 10 | + Search using DuckDuckGo and return results with URLs and text snippets. |
24 | 11 | |
25 | 12 | Args: |
26 | 13 | query (str): Search query |
27 | 14 | max_results (int): Maximum number of results to return |
28 | 15 | max_retries (int): Maximum number of retry attempts |
29 | | - initial_delay (int): Initial delay between retries in seconds |
30 | 16 | """ |
31 | 17 | for attempt in range(max_retries): |
32 | 18 | try: |
33 | | - headers = {'User-Agent': get_random_user_agent()} |
34 | | - |
35 | | - print(f"DEBUG: Attempt {attempt + 1}/{max_retries} - Searching for query: {query}", |
| 19 | + print(f"DEBUG: Searching for query: {query} (attempt {attempt + 1}/{max_retries})", |
36 | 20 | file=sys.stderr) |
37 | 21 |
|
38 | | - with DDGS(headers=headers) as ddgs: |
39 | | - # Try API backend first, fallback to HTML if needed |
40 | | - try: |
41 | | - results = list(ddgs.text( |
42 | | - query, |
43 | | - max_results=max_results, |
44 | | - backend='api' |
45 | | - )) |
46 | | - except DuckDuckGoSearchException as api_error: |
47 | | - print(f"DEBUG: API backend failed, trying HTML backend: {str(api_error)}", |
48 | | - file=sys.stderr) |
49 | | - # Add delay before trying HTML backend |
50 | | - time.sleep(1) |
51 | | - results = list(ddgs.text( |
52 | | - query, |
53 | | - max_results=max_results, |
54 | | - backend='html' |
55 | | - )) |
56 | | - |
57 | | - if not results: |
58 | | - print("DEBUG: No results found", file=sys.stderr) |
59 | | - return [] |
| 22 | + with DDGS() as ddgs: |
| 23 | + results = list(ddgs.text(query, max_results=max_results)) |
60 | 24 |
|
61 | | - print(f"DEBUG: Found {len(results)} results", file=sys.stderr) |
62 | | - return results |
| 25 | + if not results: |
| 26 | + print("DEBUG: No results found", file=sys.stderr) |
| 27 | + return [] |
| 28 | + |
| 29 | + print(f"DEBUG: Found {len(results)} results", file=sys.stderr) |
| 30 | + return results |
63 | 31 |
|
64 | 32 | except Exception as e: |
65 | | - print(f"ERROR: Attempt {attempt + 1} failed: {str(e)}", file=sys.stderr) |
66 | | - if attempt < max_retries - 1: |
67 | | - delay = initial_delay * (attempt + 1) + random.random() * 2 |
68 | | - print(f"DEBUG: Waiting {delay:.2f} seconds before retry...", file=sys.stderr) |
69 | | - time.sleep(delay) |
| 33 | + print(f"ERROR: Attempt {attempt + 1}/{max_retries} failed: {str(e)}", file=sys.stderr) |
| 34 | + if attempt < max_retries - 1: # If not the last attempt |
| 35 | + print(f"DEBUG: Waiting 1 second before retry...", file=sys.stderr) |
| 36 | + time.sleep(1) # Wait 1 second before retry |
70 | 37 | else: |
71 | | - print("ERROR: All retry attempts failed", file=sys.stderr) |
| 38 | + print(f"ERROR: All {max_retries} attempts failed", file=sys.stderr) |
72 | 39 | raise |
73 | 40 |
|
74 | 41 | def format_results(results): |
75 | 42 | """Format and print search results.""" |
76 | 43 | for i, r in enumerate(results, 1): |
77 | 44 | print(f"\n=== Result {i} ===") |
78 | | - print(f"URL: {r.get('link', r.get('href', 'N/A'))}") |
| 45 | + print(f"URL: {r.get('href', 'N/A')}") |
79 | 46 | print(f"Title: {r.get('title', 'N/A')}") |
80 | | - print(f"Snippet: {r.get('snippet', r.get('body', 'N/A'))}") |
| 47 | + print(f"Snippet: {r.get('body', 'N/A')}") |
81 | 48 |
|
82 | | -def search(query, max_results=10): |
| 49 | +def search(query, max_results=10, max_retries=3): |
83 | 50 | """ |
84 | | - Main search function that handles both API and HTML backends with retry mechanism. |
| 51 | + Main search function that handles search with retry mechanism. |
85 | 52 | |
86 | 53 | Args: |
87 | 54 | query (str): Search query |
88 | 55 | max_results (int): Maximum number of results to return |
| 56 | + max_retries (int): Maximum number of retry attempts |
89 | 57 | """ |
90 | 58 | try: |
91 | | - results = search_with_retry(query, max_results) |
| 59 | + results = search_with_retry(query, max_results, max_retries) |
92 | 60 | if results: |
93 | 61 | format_results(results) |
94 | 62 |
|
95 | 63 | except Exception as e: |
96 | 64 | print(f"ERROR: Search failed: {str(e)}", file=sys.stderr) |
97 | | - print(f"ERROR type: {type(e)}", file=sys.stderr) |
98 | | - traceback.print_exc(file=sys.stderr) |
99 | 65 | sys.exit(1) |
100 | 66 |
|
101 | 67 | def main(): |
102 | | - parser = argparse.ArgumentParser(description="Search using DuckDuckGo with fallback mechanisms") |
| 68 | + parser = argparse.ArgumentParser(description="Search using DuckDuckGo API") |
103 | 69 | parser.add_argument("query", help="Search query") |
104 | 70 | parser.add_argument("--max-results", type=int, default=10, |
105 | 71 | help="Maximum number of results (default: 10)") |
| 72 | + parser.add_argument("--max-retries", type=int, default=3, |
| 73 | + help="Maximum number of retry attempts (default: 3)") |
106 | 74 |
|
107 | 75 | args = parser.parse_args() |
108 | | - search(args.query, args.max_results) |
| 76 | + search(args.query, args.max_results, args.max_retries) |
109 | 77 |
|
110 | 78 | if __name__ == "__main__": |
111 | 79 | main() |
0 commit comments