|
30 | 30 | # URL allowed scheme list |
31 | 31 | # Enforces: |
32 | 32 | # - URLs Must start with https |
33 | | -URL_ALLOWED_SCHEMES = {"https"} |
| 33 | +URL_ALLOWED_SCHEMES = frozenset({"https"}) |
34 | 34 |
|
35 | 35 |
|
36 | 36 | # URL allowed domain list |
37 | 37 | # Enforces: |
38 | 38 | # - URLs Must belong to one of these domains |
39 | | -URL_ALLOWED_NETLOCS = {"github.com", "readthedocs.com", "docs.python.org"} |
| 39 | +URL_ALLOWED_NETLOCS = frozenset({"github.com", "readthedocs.com", "docs.python.org"}) |
| 40 | + |
| 41 | + |
| 42 | +# Maximum allowed URL length |
| 43 | +MAX_URL_LENGTH = 2048 # Common browser limit |
| 44 | +"""Maximum allowed length for URL validation. |
| 45 | +
|
| 46 | +Should be large enough for most URLs but no larger than common browser limits. |
| 47 | +
|
| 48 | +Unit-Testing: |
| 49 | +
|
| 50 | + First set up test fixtures by importing utils. |
| 51 | +
|
| 52 | + >>> import docs.utils as _utils |
| 53 | + >>> |
| 54 | +
|
| 55 | + >>> _utils.MAX_URL_LENGTH is not None |
| 56 | + True |
| 57 | + >>> type(_utils.MAX_URL_LENGTH) is type(int()) |
| 58 | + True |
| 59 | + >>> _utils.MAX_URL_LENGTH > 0 |
| 60 | + True |
| 61 | + >>> _utils.MAX_URL_LENGTH >= 256 |
| 62 | + True |
| 63 | + >>> _utils.MAX_URL_LENGTH <= 2048 |
| 64 | + True |
| 65 | + >>> |
| 66 | +
|
| 67 | +""" |
40 | 68 |
|
41 | 69 |
|
42 | 70 | # Error messages for URL validation |
| 71 | +INVALID_LENGTH_ERROR = f"URL exceeds maximum length of {MAX_URL_LENGTH} characters." |
| 72 | +"""Length error message for URL validation. |
| 73 | +
|
| 74 | +Unit-Testing: |
| 75 | +
|
| 76 | + First set up test fixtures by importing utils. |
| 77 | +
|
| 78 | + >>> import docs.utils as _utils |
| 79 | + >>> |
| 80 | +
|
| 81 | + >>> _utils.INVALID_LENGTH_ERROR is not None |
| 82 | + True |
| 83 | + >>> type(_utils.INVALID_LENGTH_ERROR) is type(str()) |
| 84 | + True |
| 85 | + >>> len(_utils.INVALID_LENGTH_ERROR) > 0 |
| 86 | + True |
| 87 | + >>> |
| 88 | +
|
| 89 | +""" |
| 90 | + |
| 91 | + |
43 | 92 | INVALID_SCHEME_ERROR = "Invalid URL scheme. Only 'https' is allowed." |
44 | 93 | """Scheme error message for URL validation. |
45 | 94 |
|
@@ -216,6 +265,9 @@ def sanitize_url(url: str) -> str: |
216 | 265 | >>> |
217 | 266 |
|
218 | 267 | """ |
| 268 | + # Validate length |
| 269 | + if len(url) > MAX_URL_LENGTH: |
| 270 | + raise ValueError(INVALID_LENGTH_ERROR) |
219 | 271 | parsed_url = urlparse(url) |
220 | 272 | # Validate scheme |
221 | 273 | if parsed_url.scheme not in URL_ALLOWED_SCHEMES: |
|
0 commit comments