Skip to content

Commit 8bc5899

Browse files
authored
fix proxy. (#1269)
1 parent a2c56d2 commit 8bc5899

File tree

2 files changed

+31
-65
lines changed

2 files changed

+31
-65
lines changed

mcp_servers/duckduckgo_atlas/pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@ authors = [{ name = "Nick Clyde", email = "nick@clyde.tech" }]
77
requires-python = ">=3.10"
88
dependencies = [
99
"beautifulsoup4>=4.13.3",
10-
"httpx>=0.28.1",
10+
"ddgs>=9.0.0",
11+
"httpx[socks]>=0.28.1",
1112
"mcp[cli]>=1.3.0",
1213
"starlette>=0.46.0",
1314
"uvicorn>=0.34.0",

mcp_servers/duckduckgo_atlas/src/duckduckgo_mcp_server/server.py

Lines changed: 29 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -25,17 +25,28 @@
2525
from starlette.types import Receive, Scope, Send
2626
import httpx
2727
from bs4 import BeautifulSoup
28+
from ddgs import DDGS
2829

2930
logger = logging.getLogger(__name__)
3031

3132

3233
def _get_proxy_url() -> Optional[str]:
33-
"""Build proxy URL from PROXY_USERNAME and PROXY_PASSWORD env vars."""
34+
"""Build proxy URL from environment variables.
35+
36+
Env vars:
37+
PROXY_USERNAME, PROXY_PASSWORD – required
38+
PROXY_HOST – default: p.webshare.io
39+
PROXY_PORT – default: 1080
40+
PROXY_SCHEME – default: socks5
41+
"""
3442
username = os.environ.get("PROXY_USERNAME")
3543
password = os.environ.get("PROXY_PASSWORD")
36-
if username and password:
37-
return f"http://{username}:{password}@p.webshare.io:80/"
38-
return None
44+
if not (username and password):
45+
return None
46+
host = os.environ.get("PROXY_HOST", "p.webshare.io")
47+
scheme = os.environ.get("PROXY_SCHEME", "http")
48+
port = os.environ.get("PROXY_PORT", "1080" if "socks" in scheme else "80")
49+
return f"{scheme}://{username}:{password}@{host}:{port}"
3950

4051

4152
# DuckDuckGo does not require authentication, but we follow the auth extraction
@@ -87,10 +98,8 @@ class SearchResult:
8798

8899

89100
class DuckDuckGoSearcher:
90-
BASE_URL = "https://html.duckduckgo.com/html"
91-
HEADERS = {
92-
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
93-
}
101+
"""Uses the duckduckgo-search library (primp browser impersonation)
102+
to avoid CAPTCHAs that raw httpx requests trigger."""
94103

95104
def format_results_for_llm(self, results: List[SearchResult]) -> str:
96105
if not results:
@@ -111,71 +120,27 @@ async def search(
111120
self, query: str, ctx: Context, max_results: int = 10
112121
) -> List[SearchResult]:
113122
try:
114-
data = {
115-
"q": query,
116-
"b": "",
117-
"kl": "",
118-
}
119-
120123
await ctx.info(f"Searching DuckDuckGo for: {query}")
121124

122125
proxy = _get_proxy_url()
123-
async with httpx.AsyncClient(proxy=proxy) as client:
124-
response = await client.post(
125-
self.BASE_URL, data=data, headers=self.HEADERS, timeout=30.0
126-
)
127-
response.raise_for_status()
128-
129-
soup = BeautifulSoup(response.text, "html.parser")
130-
if not soup:
131-
await ctx.error("Failed to parse HTML response")
132-
return []
133-
134-
results = []
135-
for result in soup.select(".result"):
136-
title_elem = result.select_one(".result__title")
137-
if not title_elem:
138-
continue
139-
140-
link_elem = title_elem.find("a")
141-
if not link_elem:
142-
continue
143-
144-
title = link_elem.get_text(strip=True)
145-
link = link_elem.get("href", "")
146-
147-
if "y.js" in link:
148-
continue
149-
150-
if link.startswith("//duckduckgo.com/l/?uddg="):
151-
link = urllib.parse.unquote(link.split("uddg=")[1].split("&")[0])
152-
153-
snippet_elem = result.select_one(".result__snippet")
154-
snippet = snippet_elem.get_text(strip=True) if snippet_elem else ""
155-
156-
results.append(
157-
SearchResult(
158-
title=title,
159-
link=link,
160-
snippet=snippet,
161-
position=len(results) + 1,
162-
)
126+
ddgs = DDGS(proxy=proxy)
127+
raw_results = ddgs.text(query, max_results=max_results, backend="duckduckgo")
128+
129+
results = [
130+
SearchResult(
131+
title=r.get("title", ""),
132+
link=r.get("href", ""),
133+
snippet=r.get("body", ""),
134+
position=i + 1,
163135
)
164-
165-
if len(results) >= max_results:
166-
break
136+
for i, r in enumerate(raw_results)
137+
]
167138

168139
await ctx.info(f"Successfully found {len(results)} results")
169140
return results
170141

171-
except httpx.TimeoutError:
172-
await ctx.error("Search request timed out")
173-
return []
174-
except httpx.HTTPError as e:
175-
await ctx.error(f"HTTP error occurred: {str(e)}")
176-
return []
177142
except Exception as e:
178-
await ctx.error(f"Unexpected error during search: {str(e)}")
143+
await ctx.error(f"Search error: {e}")
179144
traceback.print_exc(file=sys.stderr)
180145
return []
181146

0 commit comments

Comments
 (0)