Skip to content

Commit 2c224a5

Browse files
committed
change proxy rotation
1 parent 9e850d8 commit 2c224a5

File tree

1 file changed

+11
-49
lines changed

1 file changed

+11
-49
lines changed

scrapegraphai/utils/proxy_rotation.py

Lines changed: 11 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -188,59 +188,21 @@ def is_ipv4_address(address: str) -> bool:
188188

189189

190190
def parse_or_search_proxy(proxy: Proxy) -> ProxySettings:
191-
"""parses a proxy configuration or searches for a new one matching
192-
the specified broker criteria
193-
194-
Args:
195-
proxy: The proxy configuration to parse or search for.
196-
197-
Returns:
198-
A 'playwright' compliant proxy configuration.
199-
200-
Notes:
201-
- If the proxy server is a IP address, it is assumed to be
202-
a proxy server address.
203-
- If the proxy server is 'broker', a proxy server is searched for
204-
based on the provided broker criteria.
205-
206-
Example:
207-
>>> proxy = {
208-
... "server": "broker",
209-
... "criteria": {
210-
... "anonymous": True,
211-
... "countryset": {"GB", "US"},
212-
... "secure": True,
213-
... "timeout": 5.0
214-
... "search_outside_if_empty": False
215-
... }
216-
... }
217-
218-
>>> parse_or_search_proxy(proxy)
219-
{
220-
"server": "<proxy-server-matching-criteria>",
221-
}
222-
223-
Example:
224-
>>> proxy = {
225-
... "server": "192.168.1.1:8080",
226-
... "username": "<username>",
227-
... "password": "<password>"
228-
... }
229-
230-
>>> parse_or_search_proxy(proxy)
231-
{
232-
"server": "192.168.1.1:8080",
233-
"username": "<username>",
234-
"password": "<password>"
235-
}
236191
"""
237-
assert "server" in proxy, "missing server in the proxy configuration"
192+
Parses a proxy configuration or searches for a matching one via broker.
193+
"""
194+
assert "server" in proxy, "Missing 'server' field in the proxy configuration."
195+
196+
parsed_url = urlparse(proxy["server"])
197+
server_address = parsed_url.hostname
238198

239-
server_address = re.sub(r"^\w+://", "", proxy["server"]).split(":", maxsplit=1)[0]
199+
if server_address is None:
200+
raise ValueError(f"Invalid proxy server format: {proxy['server']}")
240201

241-
if is_ipv4_address(server_address):
202+
# Accept both IP addresses and domain names like 'gate.nodemaven.com'
203+
if is_ipv4_address(server_address) or re.match(r"^[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$", server_address):
242204
return _parse_proxy(proxy)
243205

244-
assert proxy["server"] == "broker", "unknown proxy server"
206+
assert proxy["server"] == "broker", f"Unknown proxy server type: {proxy['server']}"
245207

246208
return _search_proxy(proxy)

0 commit comments

Comments
 (0)