Skip to content

Commit b56a9f7

Browse files
authored
Revert "chore: update proxy-protection (#18736)" (#18739)
1 parent 69b2333 commit b56a9f7

File tree

5 files changed

+120
-657
lines changed

5 files changed

+120
-657
lines changed

tests/unit/test_request.py

Lines changed: 12 additions & 305 deletions
Original file line numberDiff line numberDiff line change
@@ -1,66 +1,10 @@
11
# SPDX-License-Identifier: Apache-2.0
22

3-
import base64
4-
import hashlib
5-
import hmac
6-
import json
7-
import time
8-
93
import pretend
10-
import pytest
114

125
from warehouse import request
136

147

15-
class TestNormalizeDomain:
16-
@pytest.mark.parametrize(
17-
("input_domain", "expected"),
18-
[
19-
# Lowercase normalization
20-
("PyPi.ORG", "pypi.org"),
21-
("TEST.PyPi.ORG", "test.pypi.org"),
22-
("LOCALHOST", "localhost"),
23-
# Trailing dots removal
24-
("pypi.org.", "pypi.org"),
25-
("pypi.org...", "pypi.org"),
26-
("localhost.", "localhost"),
27-
# Whitespace handling
28-
(" pypi.org ", "pypi.org"),
29-
("\tpypi.org\n", "pypi.org"),
30-
(" localhost ", "localhost"),
31-
# Mixed normalizations
32-
(" TEST.PyPi.ORG. ", "test.pypi.org"),
33-
(" LOCALHOST. ", "localhost"),
34-
(" 127.0.0.1 ", "127.0.0.1"),
35-
],
36-
)
37-
def test_domain_normalization(self, input_domain, expected):
38-
"""Test that domains are properly normalized."""
39-
assert request._normalize_domain(input_domain) == expected
40-
41-
def test_handles_idn_domains(self):
42-
"""Test that IDN domains are converted to ASCII (punycode)."""
43-
# These are different Unicode characters that look similar
44-
assert request._normalize_domain("рyрі.org") != "pypi.org" # Cyrillic chars
45-
# The result should be the punycode version
46-
assert request._normalize_domain("рyрі.org").startswith("xn--")
47-
48-
def test_handles_invalid_idn_domains(self):
49-
"""Test that invalid IDN domains fall back to normalized form."""
50-
# Test with invalid Unicode that can't be encoded to IDN
51-
# Using a string with invalid surrogate characters
52-
invalid_domain = "test\udcff.org" # Contains an unpaired surrogate
53-
result = request._normalize_domain(invalid_domain)
54-
# Should return the normalized version without failing
55-
assert result == "test\udcff.org"
56-
57-
# Test with a domain that causes encoding issues
58-
# Empty labels are not allowed in IDN
59-
invalid_domain2 = "test..org"
60-
result2 = request._normalize_domain(invalid_domain2)
61-
assert result2 == "test..org"
62-
63-
648
class TestCreateNonce:
659
def test_generates_unique_nonces(self):
6610
"""Test that each request gets a unique nonce."""
@@ -93,161 +37,39 @@ def test_nonce_is_url_safe(self):
9337
assert re.match(r"^[A-Za-z0-9_-]+$", nonce)
9438

9539

96-
class TestCreateIntegrityToken:
97-
def test_creates_valid_token(self):
98-
"""Test that integrity tokens are created with proper structure."""
99-
req = pretend.stub(nonce="test-nonce-123")
100-
101-
token = request._create_integrity_token(req)
102-
103-
# Should be base64 encoded
104-
assert isinstance(token, str)
105-
106-
# Should be decodable
107-
decoded = base64.b64decode(token).decode("utf-8")
108-
token_data = json.loads(decoded)
109-
110-
# Should have required fields
111-
assert "ts" in token_data
112-
assert "entropy" in token_data
113-
assert "nonce" in token_data
114-
115-
# Timestamp should be recent
116-
current_time = int(time.time())
117-
assert abs(token_data["ts"] - current_time) < 5 # Within 5 seconds
118-
119-
# Entropy should be base64 encoded
120-
entropy_bytes = base64.b64decode(token_data["entropy"])
121-
assert len(entropy_bytes) == 16
122-
123-
# Nonce should match
124-
assert token_data["nonce"] == "test-nonce-123"
125-
126-
def test_different_requests_get_different_tokens(self):
127-
"""Test that different requests get different integrity tokens."""
128-
req1 = pretend.stub(nonce="nonce-1")
129-
req2 = pretend.stub(nonce="nonce-2")
130-
131-
token1 = request._create_integrity_token(req1)
132-
token2 = request._create_integrity_token(req2)
133-
134-
assert token1 != token2
135-
136-
# Even with same nonce, entropy should differ
137-
req3 = pretend.stub(nonce="nonce-1")
138-
token3 = request._create_integrity_token(req3)
139-
140-
assert token1 != token3
141-
142-
14340
class TestCreateHashedDomains:
144-
def test_hashes_domains_with_enhanced_security(self):
145-
"""Test that domains are hashed using multi-layer approach."""
146-
# Create a mock integrity token
147-
token_data = {
148-
"ts": int(time.time()),
149-
"entropy": base64.b64encode(b"test-entropy-123").decode("ascii"),
150-
"nonce": "test-nonce-123",
151-
}
152-
integrity_token = base64.b64encode(
153-
json.dumps(token_data, sort_keys=True, separators=(",", ":")).encode(
154-
"utf-8"
155-
)
156-
).decode("ascii")
157-
41+
def test_hashes_domains_with_nonce(self):
42+
"""Test that domains are hashed using the nonce."""
15843
req = pretend.stub(
15944
nonce="test-nonce-123",
160-
integrity_token=integrity_token,
16145
registry=pretend.stub(
16246
settings={"warehouse.allowed_domains": ["pypi.org", "test.pypi.org"]}
16347
),
16448
)
16549

16650
hashed = request._create_hashed_domains(req)
16751

168-
# Should have pipe separators
169-
assert "|" in hashed
170-
parts = hashed.split("|")
171-
172-
# Should have 2 domain hashes + 1 checksum
173-
assert len(parts) == 3
174-
175-
# Each domain hash should be 64 chars (sha256 hex)
176-
for i in range(2):
177-
assert len(parts[i]) == 64
178-
assert all(c in "0123456789abcdef" for c in parts[i])
52+
# Should return comma-separated list
53+
assert "," in hashed
54+
hashes = hashed.split(",")
55+
assert len(hashes) == 2
17956

180-
# Checksum should be 16 chars
181-
assert len(parts[2]) == 16
182-
assert all(c in "0123456789abcdef" for c in parts[2])
57+
# Each hash should be 64 chars (sha256 hex)
58+
for h in hashes:
59+
assert len(h) == 64
60+
assert all(c in "0123456789abcdef" for c in h)
18361

18462
# Hashes should be different for different domains
185-
assert parts[0] != parts[1]
186-
187-
def test_domain_normalization_applied(self):
188-
"""Test that domain normalization is applied before hashing."""
189-
token_data = {
190-
"ts": int(time.time()),
191-
"entropy": base64.b64encode(b"test-entropy-123").decode("ascii"),
192-
"nonce": "test-nonce-123",
193-
}
194-
integrity_token = base64.b64encode(
195-
json.dumps(token_data, sort_keys=True, separators=(",", ":")).encode(
196-
"utf-8"
197-
)
198-
).decode("ascii")
199-
200-
# Two requests with different domain formats
201-
req1 = pretend.stub(
202-
nonce="test-nonce-123",
203-
integrity_token=integrity_token,
204-
registry=pretend.stub(settings={"warehouse.allowed_domains": ["PyPi.ORG"]}),
205-
)
206-
207-
req2 = pretend.stub(
208-
nonce="test-nonce-123",
209-
integrity_token=integrity_token,
210-
registry=pretend.stub(settings={"warehouse.allowed_domains": ["pypi.org"]}),
211-
)
212-
213-
hashed1 = request._create_hashed_domains(req1)
214-
hashed2 = request._create_hashed_domains(req2)
215-
216-
# Should produce same hash despite different case
217-
assert hashed1 == hashed2
63+
assert hashes[0] != hashes[1]
21864

21965
def test_different_nonce_produces_different_hashes(self):
22066
"""Test that different nonces produce different hashes for same domain."""
221-
token_data1 = {
222-
"ts": int(time.time()),
223-
"entropy": base64.b64encode(b"entropy-1").decode("ascii"),
224-
"nonce": "nonce-1",
225-
}
226-
integrity_token1 = base64.b64encode(
227-
json.dumps(token_data1, sort_keys=True, separators=(",", ":")).encode(
228-
"utf-8"
229-
)
230-
).decode("ascii")
231-
232-
token_data2 = {
233-
"ts": int(time.time()),
234-
"entropy": base64.b64encode(b"entropy-2").decode("ascii"),
235-
"nonce": "nonce-2",
236-
}
237-
integrity_token2 = base64.b64encode(
238-
json.dumps(token_data2, sort_keys=True, separators=(",", ":")).encode(
239-
"utf-8"
240-
)
241-
).decode("ascii")
242-
24367
req1 = pretend.stub(
24468
nonce="nonce-1",
245-
integrity_token=integrity_token1,
24669
registry=pretend.stub(settings={"warehouse.allowed_domains": ["pypi.org"]}),
24770
)
24871
req2 = pretend.stub(
24972
nonce="nonce-2",
250-
integrity_token=integrity_token2,
25173
registry=pretend.stub(settings={"warehouse.allowed_domains": ["pypi.org"]}),
25274
)
25375

@@ -256,66 +78,10 @@ def test_different_nonce_produces_different_hashes(self):
25678

25779
assert hashed1 != hashed2
25880

259-
def test_timestamp_affects_hash(self):
260-
"""Test that timestamp changes affect the hash."""
261-
token_data1 = {
262-
"ts": int(time.time()),
263-
"entropy": base64.b64encode(b"test-entropy").decode("ascii"),
264-
"nonce": "test-nonce",
265-
}
266-
267-
token_data2 = {
268-
"ts": int(time.time()) + 100, # 100 seconds later
269-
"entropy": base64.b64encode(b"test-entropy").decode("ascii"),
270-
"nonce": "test-nonce",
271-
}
272-
273-
integrity_token1 = base64.b64encode(
274-
json.dumps(token_data1, sort_keys=True, separators=(",", ":")).encode(
275-
"utf-8"
276-
)
277-
).decode("ascii")
278-
279-
integrity_token2 = base64.b64encode(
280-
json.dumps(token_data2, sort_keys=True, separators=(",", ":")).encode(
281-
"utf-8"
282-
)
283-
).decode("ascii")
284-
285-
req1 = pretend.stub(
286-
nonce="test-nonce",
287-
integrity_token=integrity_token1,
288-
registry=pretend.stub(settings={"warehouse.allowed_domains": ["pypi.org"]}),
289-
)
290-
291-
req2 = pretend.stub(
292-
nonce="test-nonce",
293-
integrity_token=integrity_token2,
294-
registry=pretend.stub(settings={"warehouse.allowed_domains": ["pypi.org"]}),
295-
)
296-
297-
hashed1 = request._create_hashed_domains(req1)
298-
hashed2 = request._create_hashed_domains(req2)
299-
300-
# Hashes should be different due to different timestamps
301-
assert hashed1 != hashed2
302-
30381
def test_empty_domains_returns_empty_string(self):
30482
"""Test that empty domain list returns empty string."""
305-
token_data = {
306-
"ts": int(time.time()),
307-
"entropy": base64.b64encode(b"test-entropy").decode("ascii"),
308-
"nonce": "test-nonce",
309-
}
310-
integrity_token = base64.b64encode(
311-
json.dumps(token_data, sort_keys=True, separators=(",", ":")).encode(
312-
"utf-8"
313-
)
314-
).decode("ascii")
315-
31683
req = pretend.stub(
31784
nonce="test-nonce",
318-
integrity_token=integrity_token,
31985
registry=pretend.stub(settings={"warehouse.allowed_domains": []}),
32086
)
32187

@@ -324,66 +90,7 @@ def test_empty_domains_returns_empty_string(self):
32490

32591
def test_no_domains_setting_returns_empty_string(self):
32692
"""Test that missing domains setting returns empty string."""
327-
token_data = {
328-
"ts": int(time.time()),
329-
"entropy": base64.b64encode(b"test-entropy").decode("ascii"),
330-
"nonce": "test-nonce",
331-
}
332-
integrity_token = base64.b64encode(
333-
json.dumps(token_data, sort_keys=True, separators=(",", ":")).encode(
334-
"utf-8"
335-
)
336-
).decode("ascii")
337-
338-
req = pretend.stub(
339-
nonce="test-nonce",
340-
integrity_token=integrity_token,
341-
registry=pretend.stub(settings={}),
342-
)
93+
req = pretend.stub(nonce="test-nonce", registry=pretend.stub(settings={}))
34394

34495
hashed = request._create_hashed_domains(req)
34596
assert hashed == ""
346-
347-
def test_checksum_validates_integrity(self):
348-
"""Test that the checksum properly validates the domain hashes."""
349-
token_data = {
350-
"ts": int(time.time()),
351-
"entropy": base64.b64encode(b"test-entropy-123").decode("ascii"),
352-
"nonce": "test-nonce-123",
353-
}
354-
integrity_token = base64.b64encode(
355-
json.dumps(token_data, sort_keys=True, separators=(",", ":")).encode(
356-
"utf-8"
357-
)
358-
).decode("ascii")
359-
360-
req = pretend.stub(
361-
nonce="test-nonce-123",
362-
integrity_token=integrity_token,
363-
registry=pretend.stub(
364-
settings={"warehouse.allowed_domains": ["pypi.org", "test.pypi.org"]}
365-
),
366-
)
367-
368-
hashed = request._create_hashed_domains(req)
369-
parts = hashed.split("|")
370-
371-
# Verify checksum matches expected format
372-
checksum = parts[-1]
373-
assert len(checksum) == 16
374-
375-
# If we change a hash, the checksum should be different
376-
# Recalculate checksum with modified hash
377-
modified_hashes = parts[:-1]
378-
modified_hashes[0] = "0" * 64 # Replace first hash with zeros
379-
380-
nonce_bytes = b"test-nonce-123"
381-
entropy_bytes = b"test-entropy-123"
382-
383-
all_hashes = "|".join(modified_hashes)
384-
new_checksum = hmac.new(
385-
nonce_bytes + entropy_bytes, all_hashes.encode("utf-8"), hashlib.sha256
386-
).hexdigest()[:16]
387-
388-
# Checksums should be different
389-
assert new_checksum != checksum

0 commit comments

Comments
 (0)