1
1
# SPDX-License-Identifier: Apache-2.0
2
2
3
- import base64
4
- import hashlib
5
- import hmac
6
- import json
7
- import time
8
-
9
3
import pretend
10
- import pytest
11
4
12
5
from warehouse import request
13
6
14
7
15
- class TestNormalizeDomain :
16
- @pytest .mark .parametrize (
17
- ("input_domain" , "expected" ),
18
- [
19
- # Lowercase normalization
20
- ("PyPi.ORG" , "pypi.org" ),
21
- ("TEST.PyPi.ORG" , "test.pypi.org" ),
22
- ("LOCALHOST" , "localhost" ),
23
- # Trailing dots removal
24
- ("pypi.org." , "pypi.org" ),
25
- ("pypi.org..." , "pypi.org" ),
26
- ("localhost." , "localhost" ),
27
- # Whitespace handling
28
- (" pypi.org " , "pypi.org" ),
29
- ("\t pypi.org\n " , "pypi.org" ),
30
- (" localhost " , "localhost" ),
31
- # Mixed normalizations
32
- (" TEST.PyPi.ORG. " , "test.pypi.org" ),
33
- (" LOCALHOST. " , "localhost" ),
34
- (" 127.0.0.1 " , "127.0.0.1" ),
35
- ],
36
- )
37
- def test_domain_normalization (self , input_domain , expected ):
38
- """Test that domains are properly normalized."""
39
- assert request ._normalize_domain (input_domain ) == expected
40
-
41
- def test_handles_idn_domains (self ):
42
- """Test that IDN domains are converted to ASCII (punycode)."""
43
- # These are different Unicode characters that look similar
44
- assert request ._normalize_domain ("рyрі.org" ) != "pypi.org" # Cyrillic chars
45
- # The result should be the punycode version
46
- assert request ._normalize_domain ("рyрі.org" ).startswith ("xn--" )
47
-
48
- def test_handles_invalid_idn_domains (self ):
49
- """Test that invalid IDN domains fall back to normalized form."""
50
- # Test with invalid Unicode that can't be encoded to IDN
51
- # Using a string with invalid surrogate characters
52
- invalid_domain = "test\udcff .org" # Contains an unpaired surrogate
53
- result = request ._normalize_domain (invalid_domain )
54
- # Should return the normalized version without failing
55
- assert result == "test\udcff .org"
56
-
57
- # Test with a domain that causes encoding issues
58
- # Empty labels are not allowed in IDN
59
- invalid_domain2 = "test..org"
60
- result2 = request ._normalize_domain (invalid_domain2 )
61
- assert result2 == "test..org"
62
-
63
-
64
8
class TestCreateNonce :
65
9
def test_generates_unique_nonces (self ):
66
10
"""Test that each request gets a unique nonce."""
@@ -93,161 +37,39 @@ def test_nonce_is_url_safe(self):
93
37
assert re .match (r"^[A-Za-z0-9_-]+$" , nonce )
94
38
95
39
96
- class TestCreateIntegrityToken :
97
- def test_creates_valid_token (self ):
98
- """Test that integrity tokens are created with proper structure."""
99
- req = pretend .stub (nonce = "test-nonce-123" )
100
-
101
- token = request ._create_integrity_token (req )
102
-
103
- # Should be base64 encoded
104
- assert isinstance (token , str )
105
-
106
- # Should be decodable
107
- decoded = base64 .b64decode (token ).decode ("utf-8" )
108
- token_data = json .loads (decoded )
109
-
110
- # Should have required fields
111
- assert "ts" in token_data
112
- assert "entropy" in token_data
113
- assert "nonce" in token_data
114
-
115
- # Timestamp should be recent
116
- current_time = int (time .time ())
117
- assert abs (token_data ["ts" ] - current_time ) < 5 # Within 5 seconds
118
-
119
- # Entropy should be base64 encoded
120
- entropy_bytes = base64 .b64decode (token_data ["entropy" ])
121
- assert len (entropy_bytes ) == 16
122
-
123
- # Nonce should match
124
- assert token_data ["nonce" ] == "test-nonce-123"
125
-
126
- def test_different_requests_get_different_tokens (self ):
127
- """Test that different requests get different integrity tokens."""
128
- req1 = pretend .stub (nonce = "nonce-1" )
129
- req2 = pretend .stub (nonce = "nonce-2" )
130
-
131
- token1 = request ._create_integrity_token (req1 )
132
- token2 = request ._create_integrity_token (req2 )
133
-
134
- assert token1 != token2
135
-
136
- # Even with same nonce, entropy should differ
137
- req3 = pretend .stub (nonce = "nonce-1" )
138
- token3 = request ._create_integrity_token (req3 )
139
-
140
- assert token1 != token3
141
-
142
-
143
40
class TestCreateHashedDomains :
144
- def test_hashes_domains_with_enhanced_security (self ):
145
- """Test that domains are hashed using multi-layer approach."""
146
- # Create a mock integrity token
147
- token_data = {
148
- "ts" : int (time .time ()),
149
- "entropy" : base64 .b64encode (b"test-entropy-123" ).decode ("ascii" ),
150
- "nonce" : "test-nonce-123" ,
151
- }
152
- integrity_token = base64 .b64encode (
153
- json .dumps (token_data , sort_keys = True , separators = ("," , ":" )).encode (
154
- "utf-8"
155
- )
156
- ).decode ("ascii" )
157
-
41
+ def test_hashes_domains_with_nonce (self ):
42
+ """Test that domains are hashed using the nonce."""
158
43
req = pretend .stub (
159
44
nonce = "test-nonce-123" ,
160
- integrity_token = integrity_token ,
161
45
registry = pretend .stub (
162
46
settings = {"warehouse.allowed_domains" : ["pypi.org" , "test.pypi.org" ]}
163
47
),
164
48
)
165
49
166
50
hashed = request ._create_hashed_domains (req )
167
51
168
- # Should have pipe separators
169
- assert "|" in hashed
170
- parts = hashed .split ("|" )
171
-
172
- # Should have 2 domain hashes + 1 checksum
173
- assert len (parts ) == 3
174
-
175
- # Each domain hash should be 64 chars (sha256 hex)
176
- for i in range (2 ):
177
- assert len (parts [i ]) == 64
178
- assert all (c in "0123456789abcdef" for c in parts [i ])
52
+ # Should return comma-separated list
53
+ assert "," in hashed
54
+ hashes = hashed .split ("," )
55
+ assert len (hashes ) == 2
179
56
180
- # Checksum should be 16 chars
181
- assert len (parts [2 ]) == 16
182
- assert all (c in "0123456789abcdef" for c in parts [2 ])
57
+ # Each hash should be 64 chars (sha256 hex)
58
+ for h in hashes :
59
+ assert len (h ) == 64
60
+ assert all (c in "0123456789abcdef" for c in h )
183
61
184
62
# Hashes should be different for different domains
185
- assert parts [0 ] != parts [1 ]
186
-
187
- def test_domain_normalization_applied (self ):
188
- """Test that domain normalization is applied before hashing."""
189
- token_data = {
190
- "ts" : int (time .time ()),
191
- "entropy" : base64 .b64encode (b"test-entropy-123" ).decode ("ascii" ),
192
- "nonce" : "test-nonce-123" ,
193
- }
194
- integrity_token = base64 .b64encode (
195
- json .dumps (token_data , sort_keys = True , separators = ("," , ":" )).encode (
196
- "utf-8"
197
- )
198
- ).decode ("ascii" )
199
-
200
- # Two requests with different domain formats
201
- req1 = pretend .stub (
202
- nonce = "test-nonce-123" ,
203
- integrity_token = integrity_token ,
204
- registry = pretend .stub (settings = {"warehouse.allowed_domains" : ["PyPi.ORG" ]}),
205
- )
206
-
207
- req2 = pretend .stub (
208
- nonce = "test-nonce-123" ,
209
- integrity_token = integrity_token ,
210
- registry = pretend .stub (settings = {"warehouse.allowed_domains" : ["pypi.org" ]}),
211
- )
212
-
213
- hashed1 = request ._create_hashed_domains (req1 )
214
- hashed2 = request ._create_hashed_domains (req2 )
215
-
216
- # Should produce same hash despite different case
217
- assert hashed1 == hashed2
63
+ assert hashes [0 ] != hashes [1 ]
218
64
219
65
def test_different_nonce_produces_different_hashes (self ):
220
66
"""Test that different nonces produce different hashes for same domain."""
221
- token_data1 = {
222
- "ts" : int (time .time ()),
223
- "entropy" : base64 .b64encode (b"entropy-1" ).decode ("ascii" ),
224
- "nonce" : "nonce-1" ,
225
- }
226
- integrity_token1 = base64 .b64encode (
227
- json .dumps (token_data1 , sort_keys = True , separators = ("," , ":" )).encode (
228
- "utf-8"
229
- )
230
- ).decode ("ascii" )
231
-
232
- token_data2 = {
233
- "ts" : int (time .time ()),
234
- "entropy" : base64 .b64encode (b"entropy-2" ).decode ("ascii" ),
235
- "nonce" : "nonce-2" ,
236
- }
237
- integrity_token2 = base64 .b64encode (
238
- json .dumps (token_data2 , sort_keys = True , separators = ("," , ":" )).encode (
239
- "utf-8"
240
- )
241
- ).decode ("ascii" )
242
-
243
67
req1 = pretend .stub (
244
68
nonce = "nonce-1" ,
245
- integrity_token = integrity_token1 ,
246
69
registry = pretend .stub (settings = {"warehouse.allowed_domains" : ["pypi.org" ]}),
247
70
)
248
71
req2 = pretend .stub (
249
72
nonce = "nonce-2" ,
250
- integrity_token = integrity_token2 ,
251
73
registry = pretend .stub (settings = {"warehouse.allowed_domains" : ["pypi.org" ]}),
252
74
)
253
75
@@ -256,66 +78,10 @@ def test_different_nonce_produces_different_hashes(self):
256
78
257
79
assert hashed1 != hashed2
258
80
259
- def test_timestamp_affects_hash (self ):
260
- """Test that timestamp changes affect the hash."""
261
- token_data1 = {
262
- "ts" : int (time .time ()),
263
- "entropy" : base64 .b64encode (b"test-entropy" ).decode ("ascii" ),
264
- "nonce" : "test-nonce" ,
265
- }
266
-
267
- token_data2 = {
268
- "ts" : int (time .time ()) + 100 , # 100 seconds later
269
- "entropy" : base64 .b64encode (b"test-entropy" ).decode ("ascii" ),
270
- "nonce" : "test-nonce" ,
271
- }
272
-
273
- integrity_token1 = base64 .b64encode (
274
- json .dumps (token_data1 , sort_keys = True , separators = ("," , ":" )).encode (
275
- "utf-8"
276
- )
277
- ).decode ("ascii" )
278
-
279
- integrity_token2 = base64 .b64encode (
280
- json .dumps (token_data2 , sort_keys = True , separators = ("," , ":" )).encode (
281
- "utf-8"
282
- )
283
- ).decode ("ascii" )
284
-
285
- req1 = pretend .stub (
286
- nonce = "test-nonce" ,
287
- integrity_token = integrity_token1 ,
288
- registry = pretend .stub (settings = {"warehouse.allowed_domains" : ["pypi.org" ]}),
289
- )
290
-
291
- req2 = pretend .stub (
292
- nonce = "test-nonce" ,
293
- integrity_token = integrity_token2 ,
294
- registry = pretend .stub (settings = {"warehouse.allowed_domains" : ["pypi.org" ]}),
295
- )
296
-
297
- hashed1 = request ._create_hashed_domains (req1 )
298
- hashed2 = request ._create_hashed_domains (req2 )
299
-
300
- # Hashes should be different due to different timestamps
301
- assert hashed1 != hashed2
302
-
303
81
def test_empty_domains_returns_empty_string (self ):
304
82
"""Test that empty domain list returns empty string."""
305
- token_data = {
306
- "ts" : int (time .time ()),
307
- "entropy" : base64 .b64encode (b"test-entropy" ).decode ("ascii" ),
308
- "nonce" : "test-nonce" ,
309
- }
310
- integrity_token = base64 .b64encode (
311
- json .dumps (token_data , sort_keys = True , separators = ("," , ":" )).encode (
312
- "utf-8"
313
- )
314
- ).decode ("ascii" )
315
-
316
83
req = pretend .stub (
317
84
nonce = "test-nonce" ,
318
- integrity_token = integrity_token ,
319
85
registry = pretend .stub (settings = {"warehouse.allowed_domains" : []}),
320
86
)
321
87
@@ -324,66 +90,7 @@ def test_empty_domains_returns_empty_string(self):
324
90
325
91
def test_no_domains_setting_returns_empty_string (self ):
326
92
"""Test that missing domains setting returns empty string."""
327
- token_data = {
328
- "ts" : int (time .time ()),
329
- "entropy" : base64 .b64encode (b"test-entropy" ).decode ("ascii" ),
330
- "nonce" : "test-nonce" ,
331
- }
332
- integrity_token = base64 .b64encode (
333
- json .dumps (token_data , sort_keys = True , separators = ("," , ":" )).encode (
334
- "utf-8"
335
- )
336
- ).decode ("ascii" )
337
-
338
- req = pretend .stub (
339
- nonce = "test-nonce" ,
340
- integrity_token = integrity_token ,
341
- registry = pretend .stub (settings = {}),
342
- )
93
+ req = pretend .stub (nonce = "test-nonce" , registry = pretend .stub (settings = {}))
343
94
344
95
hashed = request ._create_hashed_domains (req )
345
96
assert hashed == ""
346
-
347
- def test_checksum_validates_integrity (self ):
348
- """Test that the checksum properly validates the domain hashes."""
349
- token_data = {
350
- "ts" : int (time .time ()),
351
- "entropy" : base64 .b64encode (b"test-entropy-123" ).decode ("ascii" ),
352
- "nonce" : "test-nonce-123" ,
353
- }
354
- integrity_token = base64 .b64encode (
355
- json .dumps (token_data , sort_keys = True , separators = ("," , ":" )).encode (
356
- "utf-8"
357
- )
358
- ).decode ("ascii" )
359
-
360
- req = pretend .stub (
361
- nonce = "test-nonce-123" ,
362
- integrity_token = integrity_token ,
363
- registry = pretend .stub (
364
- settings = {"warehouse.allowed_domains" : ["pypi.org" , "test.pypi.org" ]}
365
- ),
366
- )
367
-
368
- hashed = request ._create_hashed_domains (req )
369
- parts = hashed .split ("|" )
370
-
371
- # Verify checksum matches expected format
372
- checksum = parts [- 1 ]
373
- assert len (checksum ) == 16
374
-
375
- # If we change a hash, the checksum should be different
376
- # Recalculate checksum with modified hash
377
- modified_hashes = parts [:- 1 ]
378
- modified_hashes [0 ] = "0" * 64 # Replace first hash with zeros
379
-
380
- nonce_bytes = b"test-nonce-123"
381
- entropy_bytes = b"test-entropy-123"
382
-
383
- all_hashes = "|" .join (modified_hashes )
384
- new_checksum = hmac .new (
385
- nonce_bytes + entropy_bytes , all_hashes .encode ("utf-8" ), hashlib .sha256
386
- ).hexdigest ()[:16 ]
387
-
388
- # Checksums should be different
389
- assert new_checksum != checksum
0 commit comments