|
29 | 29 |
|
30 | 30 | from pydantic import Field |
31 | 31 |
|
32 | | -X_PFX = "x_" |
33 | | -"""The prefix for extension namespaces. Extension namespaces must start with this prefix.""" |
34 | | - |
35 | 32 | MIN_NS_LENGTH = 3 |
36 | 33 | MAX_NS_LENGTH = 1000 |
37 | 34 | NS_LENGTH_INTERVAL = MAX_NS_LENGTH - MIN_NS_LENGTH |
38 | 35 |
|
39 | | - |
40 | 36 | # from https://docs.oasis-open.org/csaf/csaf/v2.0/os/csaf-v2.0-os.html |
41 | 37 | BCP_47_PATTERN = r"(([A-Za-z]{2,3}(-[A-Za-z]{3}(-[A-Za-z]{3}){0,2})?|[A-Za-z]{4,8})(-[A-Za-z]{4})?(-([A-Za-z]{2}|[0-9]{3}))?(-([A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(-[A-WY-Za-wy-z0-9](-[A-Za-z0-9]{2,8})+)*(-[Xx](-[A-Za-z0-9]{1,8})+)?|[Xx](-[A-Za-z0-9]{1,8})+|[Ii]-[Dd][Ee][Ff][Aa][Uu][Ll][Tt]|[Ii]-[Mm][Ii][Nn][Gg][Oo])" |
| 38 | +"""A regular expression pattern for BCP-47 language tags.""" |
42 | 39 |
|
43 | 40 | LENGTH_CHECK_PATTERN = rf"(?=.{{{MIN_NS_LENGTH},{MAX_NS_LENGTH}}}$)" |
44 | 41 | """Ensures the string is between MIN_NS_LENGTH and MAX_NS_LENGTH characters long.""" |
45 | 42 |
|
46 | 43 | # Base namespace part (before any extensions) allows . and - with restrictions |
47 | 44 | BASE_PATTERN = ( |
48 | 45 | r"(?!.*[.-]{2,})" # no consecutive separators |
49 | | - r"[a-z][a-z0-9]{2,}" # first part starts with a letter, followed by one or more alphanumeric characters |
| 46 | + r"[a-z][a-z0-9]{2,}" # first part starts with a letter, followed by three or more alphanumeric characters |
50 | 47 | r"(?:[.-][a-z0-9]+)*" # remaining parts can have alphanumeric characters and single . or - separators |
51 | 48 | ) |
| 49 | +"""The base pattern for namespaces, which must start with a letter and contain at least 3 alphanumeric characters.""" |
52 | 50 |
|
53 | 51 | X_PFX = "x_" |
| 52 | +"""The prefix for extension namespaces. Extension namespaces must start with this prefix.""" |
| 53 | + |
54 | 54 | EXPERIMENTAL_BASE = rf"{X_PFX}{BASE_PATTERN}" |
| 55 | +f"""The base pattern for experimental namespaces, which must start with the {X_PFX} prefix, |
| 56 | +followed by a string matching the base pattern.""" |
| 57 | + |
55 | 58 | BASE_NS_PATTERN = rf"({EXPERIMENTAL_BASE}|{BASE_PATTERN})" |
| 59 | +"""The complete base namespace pattern, which allows for experimental namespaces.""" |
56 | 60 |
|
57 | 61 | # Extension segment pattern (alphanumeric + limited punctuation, no consecutive punctuation, ends with alphanumeric) |
58 | 62 | EXT_SEGMENT_PATTERN = ( |
59 | 63 | r"(?!.*[.-]{2,})" # no consecutive separators |
60 | | - r"[a-zA-Z0-9]+" # first part starts with a letter, followed by one or more alphanumeric characters |
| 64 | + r"[a-zA-Z][a-zA-Z0-9]*" # first part starts with a letter, followed by one or more alphanumeric characters |
61 | 65 | r"(?:[.-][a-zA-Z0-9]+)*" # remaining parts can have alphanumeric characters and single ., -, / separators |
62 | 66 | ) |
| 67 | +"""The pattern for extension segments in namespaces, which must start with a letter and contain alphanumeric characters or |
| 68 | +limited punctuation characters (., -), with no consecutive punctuation characters allowed.""" |
63 | 69 |
|
64 | 70 | # Language extension pattern (BCP-47 or empty for //) |
65 | | -LANG_EXT_PATTERN = rf"(/({BCP_47_PATTERN})|/)" |
| 71 | +LANG_EXT_PATTERN = rf"(/({BCP_47_PATTERN})/|//)" |
| 72 | +"""The pattern for the first extension segment, which must be either a valid BCP-47 tag or empty (//).""" |
66 | 73 |
|
67 | 74 | # Subsequent extension segments |
68 | | -SUBSEQUENT_EXT_PATTERN = rf"(/{EXT_SEGMENT_PATTERN})*" |
| 75 | +SUBSEQUENT_EXT_PATTERN = rf"{EXT_SEGMENT_PATTERN}(?:/{EXT_SEGMENT_PATTERN})*" |
| 76 | +"""The pattern for subsequent extension segments, which must follow the rules for extension segments, delimited by slashes (/).""" |
69 | 77 |
|
70 | 78 | # Complete pattern with length validation |
71 | 79 | NS_PATTERN = re.compile( |
72 | | - rf"^{LENGTH_CHECK_PATTERN}{BASE_NS_PATTERN}({LANG_EXT_PATTERN}{SUBSEQUENT_EXT_PATTERN})?$" |
| 80 | + rf"^{LENGTH_CHECK_PATTERN}({BASE_NS_PATTERN})({LANG_EXT_PATTERN}{SUBSEQUENT_EXT_PATTERN})?$" |
73 | 81 | ) |
74 | | -f"""The regular expression pattern for validating namespaces. |
| 82 | +f"""The full regular expression pattern for validating namespaces. |
| 83 | +
|
| 84 | +!!! note "Length Requirements" |
75 | 85 |
|
76 | | -!!! note "Namespace Validation Rules" |
| 86 | + - Namespaces must be between {MIN_NS_LENGTH} and {MAX_NS_LENGTH} characters long. |
77 | 87 |
|
78 | | - Namespace values must |
| 88 | +!!! note "Base Namespace Requirements" |
| 89 | + |
| 90 | + - Must start with a lowercase letter |
| 91 | + - Must contain at least 3 total characters in the base part (after the optional experimental/private prefix) |
| 92 | + - Must contain only lowercase letters, numbers, dots (`.`), and hyphens (`-`) |
| 93 | + - Must not contain consecutive dots or hyphens (no `..`, `--`, `.-`, `-.`, `---`, etc.) |
| 94 | + - May optionally start with the experimental/private prefix `{X_PFX}`. |
| 95 | + |
| 96 | +!!! note "Extension Requirements (Optional)" |
79 | 97 | |
80 | | - - be {MIN_NS_LENGTH}-{MAX_NS_LENGTH} characters long |
81 | | - - optionally start with the experimental/private prefix `{X_PFX}` |
82 | | - - after the optional experimental/private prefix, they must: |
83 | | - - start with a letter |
84 | | - - contain at least 3 alphanumeric characters (longer is permitted) |
85 | | - - contain only lowercase alphanumeric characters and limited punctuation characters (`.`, `-`) |
86 | | - - extensions are supported and optional, and are delineated by slashes (`/`) |
87 | | - - more than one extension segment is allowed, however: |
88 | | - - the first extension segment, if present, is reserved for a BCP-47 language tag, otherwise it must be empty |
89 | | - - if no BCP-47 tag is present, the first extension segment must be empty (i.e., `//`) |
90 | | - - double slashes (`//`) are *only* permitted in the *first segment* to indicate no BCP-47 tag |
91 | | - - beyond the first extension segment, subsequent segments must: |
92 | | - - contain only alphanumeric characters and limited punctuation characters (`.`, `-`) |
93 | | - - have only one punctuation character in a row (no double dashes or dots) |
94 | | - - end with an alphanumeric character |
| 98 | + - Extensions are optional |
| 99 | + - Extensions must be delineated by slashes (`/`) |
| 100 | + - If any extension segments are present, the following rules apply: |
| 101 | + - The first extension segment, must be a valid BCP-47 language tag or empty (i.e., `//`). |
| 102 | + - Subsequent extension segments: |
| 103 | + - must start with a letter (upper or lowercase) |
| 104 | + - may contain letters, numbers, dots (`.`), and hyphens (`-`) |
| 105 | + - must not start or end with a dot or hyphen |
| 106 | + - must not contain consecutive dots or hyphens (no `..`, `--`, `.-`, `-.`, `---`, etc.) |
| 107 | + - are separated by single forward slashes (`/`) |
| 108 | + - multiple extension segments are allowed |
95 | 109 | |
96 | 110 | """ |
97 | 111 |
|
|
0 commit comments