diff --git a/src/requests/auth.py b/src/requests/auth.py index c39b645189..240b2a8f13 100644 --- a/src/requests/auth.py +++ b/src/requests/auth.py @@ -12,6 +12,7 @@ import time import warnings from base64 import b64encode +from urllib.parse import quote from ._internal_utils import to_native_string from .compat import basestring, str, urlparse @@ -186,7 +187,18 @@ def sha512_utf8(x): if p_parsed.query: path += f"?{p_parsed.query}" - A1 = f"{self.username}:{realm}:{self.password}" + # Normalize username and password to str. If bytes were passed + # (e.g. 'Ondřej'.encode('utf-8')), decode them so that f-string + # interpolation doesn't produce the repr of the bytes object + # (e.g. "b'Ond\\xc5\\x99ej'"). See GitHub issue #6102. + username = self.username + password = self.password + if isinstance(username, bytes): + username = username.decode("utf-8") + if isinstance(password, bytes): + password = password.decode("utf-8") + + A1 = f"{username}:{realm}:{password}" A2 = f"{method}:{path}" HA1 = hash_utf8(A1) @@ -218,8 +230,19 @@ def sha512_utf8(x): self._thread_local.last_nonce = nonce # XXX should the partial digests be encoded too? + # Per RFC 7616 Section 3.4.3, if the username can't be encoded + # in ISO-8859-1 (latin-1), use the username* parameter with + # UTF-8 percent-encoding instead. + try: + username.encode("latin-1") + username_field = f'username="{username}"' + except UnicodeEncodeError: + # RFC 7616 / RFC 5987: username*=UTF-8''percent-encoded + encoded_username = quote(username, safe="") + username_field = f"username*=UTF-8''{encoded_username}" + base = ( - f'username="{self.username}", realm="{realm}", nonce="{nonce}", ' + f'{username_field}, realm="{realm}", nonce="{nonce}", ' f'uri="{path}", response="{respdig}"' ) if opaque: diff --git a/tests/test_requests.py b/tests/test_requests.py index 257d9d7ab1..0aa285ced6 100644 --- a/tests/test_requests.py +++ b/tests/test_requests.py @@ -807,6 +807,84 @@ def test_DIGESTAUTH_QUOTES_QOP_VALUE(self, httpbin): r = requests.get(url, auth=auth) assert '"auth"' in r.request.headers["Authorization"] + def test_DIGESTAUTH_NON_LATIN_USERNAME_STR(self): + """Non-latin str username uses RFC 7616 username* parameter.""" + auth = HTTPDigestAuth("Ondřej", "heslíčko") + auth.init_per_thread_state() + auth._thread_local.chal = { + "realm": "testrealm", + "nonce": "testnonce", + } + header = auth.build_digest_header("GET", "https://example.com/") + assert header is not None + # Should use username* with UTF-8 percent-encoding, not username="..." + assert "username*=UTF-8''" in header + assert "Ond" in header # percent-encoded form includes literal ASCII chars + # Must NOT contain bytes repr like b'...' + assert "b'" not in header + + def test_DIGESTAUTH_NON_LATIN_USERNAME_BYTES(self): + """Non-latin bytes username is decoded and uses username* parameter.""" + auth = HTTPDigestAuth("Ondřej".encode("utf-8"), "heslíčko".encode("utf-8")) + auth.init_per_thread_state() + auth._thread_local.chal = { + "realm": "testrealm", + "nonce": "testnonce", + } + header = auth.build_digest_header("GET", "https://example.com/") + assert header is not None + # Must NOT contain bytes repr like b'Ond\xc5\x99ej' + assert "b'" not in header + assert "\\x" not in header + # Should use username* with UTF-8 percent-encoding + assert "username*=UTF-8''" in header + + def test_DIGESTAUTH_BYTES_AND_STR_PRODUCE_SAME_HASH(self): + """Bytes and str credentials produce the same digest response. + + Uses a challenge without qop to avoid non-deterministic cnonce + values that would make the two responses differ. + """ + chal = { + "realm": "testrealm", + "nonce": "testnonce", + } + + auth_str = HTTPDigestAuth("Сергей", "пароль") + auth_str.init_per_thread_state() + auth_str._thread_local.chal = dict(chal) + header_str = auth_str.build_digest_header("GET", "https://example.com/") + + auth_bytes = HTTPDigestAuth("Сергей".encode("utf-8"), "пароль".encode("utf-8")) + auth_bytes.init_per_thread_state() + auth_bytes._thread_local.chal = dict(chal) + header_bytes = auth_bytes.build_digest_header("GET", "https://example.com/") + + assert header_str is not None + assert header_bytes is not None + + # Extract the response= digest from both headers — they must match + import re as re_mod + + resp_str = re_mod.search(r'response="([^"]+)"', header_str) + resp_bytes = re_mod.search(r'response="([^"]+)"', header_bytes) + assert resp_str and resp_bytes + assert resp_str.group(1) == resp_bytes.group(1) + + def test_DIGESTAUTH_LATIN_USERNAME_USES_STANDARD_FIELD(self): + """Latin-1 compatible username uses standard username= parameter.""" + auth = HTTPDigestAuth("user", "pass") + auth.init_per_thread_state() + auth._thread_local.chal = { + "realm": "testrealm", + "nonce": "testnonce", + } + header = auth.build_digest_header("GET", "https://example.com/") + assert header is not None + # Should use standard username="..." (not username*) + assert 'username="user"' in header + assert "username*" not in header + def test_POSTBIN_GET_POST_FILES(self, httpbin): url = httpbin("post") requests.post(url).raise_for_status()