Skip to content

Commit 8d4e833

Browse files
committed
Move proxy helpers in a dedicated module.
1 parent 5c809b0 commit 8d4e833

File tree

6 files changed

+393
-328
lines changed

6 files changed

+393
-328
lines changed

src/websockets/asyncio/client.py

Lines changed: 4 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from typing import Any, Callable, Literal, cast
1313

1414
from ..client import ClientProtocol, backoff
15-
from ..datastructures import Headers, HeadersLike
15+
from ..datastructures import HeadersLike
1616
from ..exceptions import (
1717
InvalidMessage,
1818
InvalidProxyMessage,
@@ -23,12 +23,13 @@
2323
)
2424
from ..extensions.base import ClientExtensionFactory
2525
from ..extensions.permessage_deflate import enable_client_permessage_deflate
26-
from ..headers import build_authorization_basic, build_host, validate_subprotocols
26+
from ..headers import validate_subprotocols
2727
from ..http11 import USER_AGENT, Response
2828
from ..protocol import CONNECTING, Event
29+
from ..proxy import Proxy, get_proxy, parse_proxy, prepare_connect_request
2930
from ..streams import StreamReader
3031
from ..typing import LoggerLike, Origin, Subprotocol
31-
from ..uri import Proxy, WebSocketURI, get_proxy, parse_proxy, parse_uri
32+
from ..uri import WebSocketURI, parse_uri
3233
from .compatibility import TimeoutError, asyncio_timeout
3334
from .connection import Connection
3435

@@ -721,25 +722,6 @@ async def connect_socks_proxy(
721722
raise ProxyError("failed to connect to SOCKS proxy") from exc
722723

723724

724-
def prepare_connect_request(
725-
proxy: Proxy,
726-
ws_uri: WebSocketURI,
727-
user_agent_header: str | None = None,
728-
) -> bytes:
729-
host = build_host(ws_uri.host, ws_uri.port, ws_uri.secure, always_include_port=True)
730-
headers = Headers()
731-
headers["Host"] = build_host(ws_uri.host, ws_uri.port, ws_uri.secure)
732-
if user_agent_header is not None:
733-
headers["User-Agent"] = user_agent_header
734-
if proxy.username is not None:
735-
assert proxy.password is not None # enforced by parse_proxy()
736-
headers["Proxy-Authorization"] = build_authorization_basic(
737-
proxy.username, proxy.password
738-
)
739-
# We cannot use the Request class because it supports only GET requests.
740-
return f"CONNECT {host} HTTP/1.1\r\n".encode() + headers.serialize()
741-
742-
743725
class HTTPProxyConnection(asyncio.Protocol):
744726
def __init__(
745727
self,

src/websockets/proxy.py

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
from __future__ import annotations
2+
3+
import dataclasses
4+
import urllib.parse
5+
import urllib.request
6+
7+
from .datastructures import Headers
8+
from .exceptions import InvalidProxy
9+
from .headers import build_authorization_basic, build_host
10+
from .http11 import USER_AGENT
11+
from .uri import DELIMS, WebSocketURI
12+
13+
14+
__all__ = ["get_proxy", "parse_proxy", "Proxy"]
15+
16+
17+
@dataclasses.dataclass
18+
class Proxy:
19+
"""
20+
Proxy address.
21+
22+
Attributes:
23+
scheme: ``"socks5h"``, ``"socks5"``, ``"socks4a"``, ``"socks4"``,
24+
``"https"``, or ``"http"``.
25+
host: Normalized to lower case.
26+
port: Always set even if it's the default.
27+
username: Available when the proxy address contains `User Information`_.
28+
password: Available when the proxy address contains `User Information`_.
29+
30+
.. _User Information: https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.1
31+
32+
"""
33+
34+
scheme: str
35+
host: str
36+
port: int
37+
username: str | None = None
38+
password: str | None = None
39+
40+
@property
41+
def user_info(self) -> tuple[str, str] | None:
42+
if self.username is None:
43+
return None
44+
assert self.password is not None
45+
return (self.username, self.password)
46+
47+
48+
def parse_proxy(proxy: str) -> Proxy:
49+
"""
50+
Parse and validate a proxy.
51+
52+
Args:
53+
proxy: proxy.
54+
55+
Returns:
56+
Parsed proxy.
57+
58+
Raises:
59+
InvalidProxy: If ``proxy`` isn't a valid proxy.
60+
61+
"""
62+
parsed = urllib.parse.urlparse(proxy)
63+
if parsed.scheme not in ["socks5h", "socks5", "socks4a", "socks4", "https", "http"]:
64+
raise InvalidProxy(proxy, f"scheme {parsed.scheme} isn't supported")
65+
if parsed.hostname is None:
66+
raise InvalidProxy(proxy, "hostname isn't provided")
67+
if parsed.path not in ["", "/"]:
68+
raise InvalidProxy(proxy, "path is meaningless")
69+
if parsed.query != "":
70+
raise InvalidProxy(proxy, "query is meaningless")
71+
if parsed.fragment != "":
72+
raise InvalidProxy(proxy, "fragment is meaningless")
73+
74+
scheme = parsed.scheme
75+
host = parsed.hostname
76+
port = parsed.port or (443 if parsed.scheme == "https" else 80)
77+
username = parsed.username
78+
password = parsed.password
79+
# urllib.parse.urlparse accepts URLs with a username but without a
80+
# password. This doesn't make sense for HTTP Basic Auth credentials.
81+
if username is not None and password is None:
82+
raise InvalidProxy(proxy, "username provided without password")
83+
84+
try:
85+
proxy.encode("ascii")
86+
except UnicodeEncodeError:
87+
# Input contains non-ASCII characters.
88+
# It must be an IRI. Convert it to a URI.
89+
host = host.encode("idna").decode()
90+
if username is not None:
91+
assert password is not None
92+
username = urllib.parse.quote(username, safe=DELIMS)
93+
password = urllib.parse.quote(password, safe=DELIMS)
94+
95+
return Proxy(scheme, host, port, username, password)
96+
97+
98+
def get_proxy(uri: WebSocketURI) -> str | None:
99+
"""
100+
Return the proxy to use for connecting to the given WebSocket URI, if any.
101+
102+
"""
103+
if urllib.request.proxy_bypass(f"{uri.host}:{uri.port}"):
104+
return None
105+
106+
# According to the _Proxy Usage_ section of RFC 6455, use a SOCKS5 proxy if
107+
# available, else favor the proxy for HTTPS connections over the proxy for
108+
# HTTP connections.
109+
110+
# The priority of a proxy for WebSocket connections is unspecified. We give
111+
# it the highest priority. This makes it easy to configure a specific proxy
112+
# for websockets.
113+
114+
# getproxies() may return SOCKS proxies as {"socks": "http://host:port"} or
115+
# as {"https": "socks5h://host:port"} depending on whether they're declared
116+
# in the operating system or in environment variables.
117+
118+
proxies = urllib.request.getproxies()
119+
if uri.secure:
120+
schemes = ["wss", "socks", "https"]
121+
else:
122+
schemes = ["ws", "socks", "https", "http"]
123+
124+
for scheme in schemes:
125+
proxy = proxies.get(scheme)
126+
if proxy is not None:
127+
if scheme == "socks" and proxy.startswith("http://"):
128+
proxy = "socks5h://" + proxy[7:]
129+
return proxy
130+
else:
131+
return None
132+
133+
134+
def prepare_connect_request(
135+
proxy: Proxy,
136+
ws_uri: WebSocketURI,
137+
user_agent_header: str | None = USER_AGENT,
138+
) -> bytes:
139+
host = build_host(ws_uri.host, ws_uri.port, ws_uri.secure, always_include_port=True)
140+
headers = Headers()
141+
headers["Host"] = build_host(ws_uri.host, ws_uri.port, ws_uri.secure)
142+
if user_agent_header is not None:
143+
headers["User-Agent"] = user_agent_header
144+
if proxy.username is not None:
145+
assert proxy.password is not None # enforced by parse_proxy()
146+
headers["Proxy-Authorization"] = build_authorization_basic(
147+
proxy.username, proxy.password
148+
)
149+
# We cannot use the Request class because it supports only GET requests.
150+
return f"CONNECT {host} HTTP/1.1\r\n".encode() + headers.serialize()

src/websockets/sync/client.py

Lines changed: 4 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,17 @@
88
from typing import Any, Callable, Literal, TypeVar, cast
99

1010
from ..client import ClientProtocol
11-
from ..datastructures import Headers, HeadersLike
11+
from ..datastructures import HeadersLike
1212
from ..exceptions import InvalidProxyMessage, InvalidProxyStatus, ProxyError
1313
from ..extensions.base import ClientExtensionFactory
1414
from ..extensions.permessage_deflate import enable_client_permessage_deflate
15-
from ..headers import build_authorization_basic, build_host, validate_subprotocols
15+
from ..headers import validate_subprotocols
1616
from ..http11 import USER_AGENT, Response
1717
from ..protocol import CONNECTING, Event
18+
from ..proxy import Proxy, get_proxy, parse_proxy, prepare_connect_request
1819
from ..streams import StreamReader
1920
from ..typing import BytesLike, LoggerLike, Origin, Subprotocol
20-
from ..uri import Proxy, WebSocketURI, get_proxy, parse_proxy, parse_uri
21+
from ..uri import WebSocketURI, parse_uri
2122
from .connection import Connection
2223
from .utils import Deadline
2324

@@ -476,25 +477,6 @@ def connect_socks_proxy(
476477
raise ProxyError("failed to connect to SOCKS proxy") from exc
477478

478479

479-
def prepare_connect_request(
480-
proxy: Proxy,
481-
ws_uri: WebSocketURI,
482-
user_agent_header: str | None = None,
483-
) -> bytes:
484-
host = build_host(ws_uri.host, ws_uri.port, ws_uri.secure, always_include_port=True)
485-
headers = Headers()
486-
headers["Host"] = build_host(ws_uri.host, ws_uri.port, ws_uri.secure)
487-
if user_agent_header is not None:
488-
headers["User-Agent"] = user_agent_header
489-
if proxy.username is not None:
490-
assert proxy.password is not None # enforced by parse_proxy()
491-
headers["Proxy-Authorization"] = build_authorization_basic(
492-
proxy.username, proxy.password
493-
)
494-
# We cannot use the Request class because it supports only GET requests.
495-
return f"CONNECT {host} HTTP/1.1\r\n".encode() + headers.serialize()
496-
497-
498480
def read_connect_response(sock: socket.socket, deadline: Deadline) -> Response:
499481
reader = StreamReader()
500482
parser = Response.parse(

src/websockets/uri.py

Lines changed: 1 addition & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,8 @@
22

33
import dataclasses
44
import urllib.parse
5-
import urllib.request
65

7-
from .exceptions import InvalidProxy, InvalidURI
6+
from .exceptions import InvalidURI
87

98

109
__all__ = ["parse_uri", "WebSocketURI"]
@@ -106,120 +105,3 @@ def parse_uri(uri: str) -> WebSocketURI:
106105
password = urllib.parse.quote(password, safe=DELIMS)
107106

108107
return WebSocketURI(secure, host, port, path, query, username, password)
109-
110-
111-
@dataclasses.dataclass
112-
class Proxy:
113-
"""
114-
Proxy.
115-
116-
Attributes:
117-
scheme: ``"socks5h"``, ``"socks5"``, ``"socks4a"``, ``"socks4"``,
118-
``"https"``, or ``"http"``.
119-
host: Normalized to lower case.
120-
port: Always set even if it's the default.
121-
username: Available when the proxy address contains `User Information`_.
122-
password: Available when the proxy address contains `User Information`_.
123-
124-
.. _User Information: https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.1
125-
126-
"""
127-
128-
scheme: str
129-
host: str
130-
port: int
131-
username: str | None = None
132-
password: str | None = None
133-
134-
@property
135-
def user_info(self) -> tuple[str, str] | None:
136-
if self.username is None:
137-
return None
138-
assert self.password is not None
139-
return (self.username, self.password)
140-
141-
142-
def parse_proxy(proxy: str) -> Proxy:
143-
"""
144-
Parse and validate a proxy.
145-
146-
Args:
147-
proxy: proxy.
148-
149-
Returns:
150-
Parsed proxy.
151-
152-
Raises:
153-
InvalidProxy: If ``proxy`` isn't a valid proxy.
154-
155-
"""
156-
parsed = urllib.parse.urlparse(proxy)
157-
if parsed.scheme not in ["socks5h", "socks5", "socks4a", "socks4", "https", "http"]:
158-
raise InvalidProxy(proxy, f"scheme {parsed.scheme} isn't supported")
159-
if parsed.hostname is None:
160-
raise InvalidProxy(proxy, "hostname isn't provided")
161-
if parsed.path not in ["", "/"]:
162-
raise InvalidProxy(proxy, "path is meaningless")
163-
if parsed.query != "":
164-
raise InvalidProxy(proxy, "query is meaningless")
165-
if parsed.fragment != "":
166-
raise InvalidProxy(proxy, "fragment is meaningless")
167-
168-
scheme = parsed.scheme
169-
host = parsed.hostname
170-
port = parsed.port or (443 if parsed.scheme == "https" else 80)
171-
username = parsed.username
172-
password = parsed.password
173-
# urllib.parse.urlparse accepts URLs with a username but without a
174-
# password. This doesn't make sense for HTTP Basic Auth credentials.
175-
if username is not None and password is None:
176-
raise InvalidProxy(proxy, "username provided without password")
177-
178-
try:
179-
proxy.encode("ascii")
180-
except UnicodeEncodeError:
181-
# Input contains non-ASCII characters.
182-
# It must be an IRI. Convert it to a URI.
183-
host = host.encode("idna").decode()
184-
if username is not None:
185-
assert password is not None
186-
username = urllib.parse.quote(username, safe=DELIMS)
187-
password = urllib.parse.quote(password, safe=DELIMS)
188-
189-
return Proxy(scheme, host, port, username, password)
190-
191-
192-
def get_proxy(uri: WebSocketURI) -> str | None:
193-
"""
194-
Return the proxy to use for connecting to the given WebSocket URI, if any.
195-
196-
"""
197-
if urllib.request.proxy_bypass(f"{uri.host}:{uri.port}"):
198-
return None
199-
200-
# According to the _Proxy Usage_ section of RFC 6455, use a SOCKS5 proxy if
201-
# available, else favor the proxy for HTTPS connections over the proxy for
202-
# HTTP connections.
203-
204-
# The priority of a proxy for WebSocket connections is unspecified. We give
205-
# it the highest priority. This makes it easy to configure a specific proxy
206-
# for websockets.
207-
208-
# getproxies() may return SOCKS proxies as {"socks": "http://host:port"} or
209-
# as {"https": "socks5h://host:port"} depending on whether they're declared
210-
# in the operating system or in environment variables.
211-
212-
proxies = urllib.request.getproxies()
213-
if uri.secure:
214-
schemes = ["wss", "socks", "https"]
215-
else:
216-
schemes = ["ws", "socks", "https", "http"]
217-
218-
for scheme in schemes:
219-
proxy = proxies.get(scheme)
220-
if proxy is not None:
221-
if scheme == "socks" and proxy.startswith("http://"):
222-
proxy = "socks5h://" + proxy[7:]
223-
return proxy
224-
else:
225-
return None

0 commit comments

Comments
 (0)