Skip to content

Commit 3993da4

Browse files
committed
work in progress commit. Not quite working yet
1 parent 3b5f191 commit 3993da4

File tree

2 files changed

+238
-12
lines changed

2 files changed

+238
-12
lines changed

src/ssvc/namespaces.py

Lines changed: 41 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -36,21 +36,40 @@
3636
MAX_NS_LENGTH = 1000
3737
NS_LENGTH_INTERVAL = MAX_NS_LENGTH - MIN_NS_LENGTH
3838

39+
40+
# from https://docs.oasis-open.org/csaf/csaf/v2.0/os/csaf-v2.0-os.html
41+
BCP_47_PATTERN = r"(([A-Za-z]{2,3}(-[A-Za-z]{3}(-[A-Za-z]{3}){0,2})?|[A-Za-z]{4,8})(-[A-Za-z]{4})?(-([A-Za-z]{2}|[0-9]{3}))?(-([A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(-[A-WY-Za-wy-z0-9](-[A-Za-z0-9]{2,8})+)*(-[Xx](-[A-Za-z0-9]{1,8})+)?|[Xx](-[A-Za-z0-9]{1,8})+|[Ii]-[Dd][Ee][Ff][Aa][Uu][Ll][Tt]|[Ii]-[Mm][Ii][Nn][Gg][Oo])"
42+
3943
LENGTH_CHECK_PATTERN = rf"(?=.{{{MIN_NS_LENGTH},{MAX_NS_LENGTH}}}$)"
4044
"""Ensures the string is between MIN_NS_LENGTH and MAX_NS_LENGTH characters long."""
4145

42-
PREFIX_CHECK_PATTERN = rf"(x_)?[a-z0-9]{{{MIN_NS_LENGTH}}}"
43-
"""Ensures the string starts with an optional prefix followed by at least 3 alphanumeric characters."""
46+
# Base namespace part (before any extensions) allows . and - with restrictions
47+
BASE_PATTERN = (
48+
r"(?!.*[.-]{2,})" # no consecutive separators
49+
r"[a-z][a-z0-9]{2,}" # first part starts with a letter, followed by one or more alphanumeric characters
50+
r"(?:[.-][a-z0-9]+)*" # remaining parts can have alphanumeric characters and single . or - separators
51+
)
52+
53+
X_PFX = "x_"
54+
EXPERIMENTAL_BASE = rf"{X_PFX}{BASE_PATTERN}"
55+
BASE_NS_PATTERN = rf"({EXPERIMENTAL_BASE}|{BASE_PATTERN})"
56+
57+
# Extension segment pattern (alphanumeric + limited punctuation, no consecutive punctuation, ends with alphanumeric)
58+
EXT_SEGMENT_PATTERN = (
59+
r"(?!.*[.-]{2,})" # no consecutive separators
60+
r"[a-zA-Z0-9]+" # first part starts with a letter, followed by one or more alphanumeric characters
61+
r"(?:[.-][a-zA-Z0-9]+)*" # remaining parts can have alphanumeric characters and single ., -, / separators
62+
)
4463

45-
REMAINDER_CHECK_PATTERN = rf"([/.-]?[a-z0-9]+){{0,{NS_LENGTH_INTERVAL}}}$"
46-
"""Ensures that the string contains only lowercase alphanumeric characters and limited punctuation characters (`/`, `.`, `-`),"""
64+
# Language extension pattern (BCP-47 or empty for //)
65+
LANG_EXT_PATTERN = rf"(/({BCP_47_PATTERN})|/)"
4766

67+
# Subsequent extension segments
68+
SUBSEQUENT_EXT_PATTERN = rf"(/{EXT_SEGMENT_PATTERN})*"
4869

49-
# pattern to match
50-
# NOTE: be careful with this regex. We're using f-strings to insert the min and max lengths, so we need to ensure that
51-
# literal { and } characters are escaped properly (doubled up) so they appear in as single braces in the final regex.
70+
# Complete pattern with length validation
5271
NS_PATTERN = re.compile(
53-
rf"^{LENGTH_CHECK_PATTERN}{PREFIX_CHECK_PATTERN}{REMAINDER_CHECK_PATTERN}$"
72+
rf"^{LENGTH_CHECK_PATTERN}{BASE_NS_PATTERN}({LANG_EXT_PATTERN}{SUBSEQUENT_EXT_PATTERN})?$"
5473
)
5574
f"""The regular expression pattern for validating namespaces.
5675
@@ -59,10 +78,20 @@
5978
Namespace values must
6079
6180
- be {MIN_NS_LENGTH}-{MAX_NS_LENGTH} characters long
62-
- contain only lowercase alphanumeric characters and limited punctuation characters (`/`,`.` and `-`)
63-
- have only one punctuation character in a row
64-
- start with 3 alphanumeric characters after the optional extension prefix
65-
- end with an alphanumeric character
81+
- optionally start with the experimental/private prefix `{X_PFX}`
82+
- after the optional experimental/private prefix, they must:
83+
- start with a letter
84+
- contain at least 3 alphanumeric characters (longer is permitted)
85+
- contain only lowercase alphanumeric characters and limited punctuation characters (`.`, `-`)
86+
- extensions are supported and optional, and are delineated by slashes (`/`)
87+
- more than one extension segment is allowed, however:
88+
- the first extension segment, if present, is reserved for a BCP-47 language tag, otherwise it must be empty
89+
- if no BCP-47 tag is present, the first extension segment must be empty (i.e., `//`)
90+
- double slashes (`//`) are *only* permitted in the *first segment* to indicate no BCP-47 tag
91+
- beyond the first extension segment, subsequent segments must:
92+
- contain only alphanumeric characters and limited punctuation characters (`.`, `-`)
93+
- have only one punctuation character in a row (no double dashes or dots)
94+
- end with an alphanumeric character
6695
6796
"""
6897

Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
# Copyright (c) 2025 Carnegie Mellon University.
2+
# NO WARRANTY. THIS CARNEGIE MELLON UNIVERSITY AND SOFTWARE
3+
# ENGINEERING INSTITUTE MATERIAL IS FURNISHED ON AN "AS-IS" BASIS.
4+
# CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND,
5+
# EITHER EXPRESSED OR IMPLIED, AS TO ANY MATTER INCLUDING, BUT
6+
# NOT LIMITED TO, WARRANTY OF FITNESS FOR PURPOSE OR
7+
# MERCHANTABILITY, EXCLUSIVITY, OR RESULTS OBTAINED FROM USE
8+
# OF THE MATERIAL. CARNEGIE MELLON UNIVERSITY DOES NOT MAKE
9+
# ANY WARRANTY OF ANY KIND WITH RESPECT TO FREEDOM FROM
10+
# PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
11+
# Licensed under a MIT (SEI)-style license, please see LICENSE or contact
12+
# permission@sei.cmu.edu for full terms.
13+
# [DISTRIBUTION STATEMENT A] This material has been approved for
14+
# public release and unlimited distribution. Please see Copyright notice
15+
# for non-US Government use and distribution.
16+
# This Software includes and/or makes use of Third-Party Software each
17+
# subject to its own license.
18+
# DM24-0278
19+
20+
import logging
21+
import re
22+
import unittest
23+
24+
from ssvc.namespaces import (
25+
BASE_NS_PATTERN,
26+
BASE_PATTERN,
27+
LENGTH_CHECK_PATTERN,
28+
MAX_NS_LENGTH,
29+
MIN_NS_LENGTH,
30+
NS_PATTERN,
31+
)
32+
33+
logger = logging.getLogger(__name__)
34+
35+
36+
class TestNamespacePattern(unittest.TestCase):
37+
def setUp(self):
38+
self.expect_success = [
39+
"ssvc",
40+
"cisa",
41+
"custom", # not in enum, but valid for the pattern
42+
"x_private-test", # valid namespace with dash
43+
"x_custom", # valid namespace with x_ prefix
44+
"x_custom.with.dots", # valid namespace with x_ prefix and dots
45+
"abc", # not in enum, but valid for the pattern
46+
"x_abc", # valid namespace with x_ prefix
47+
"x_custom//extension", # double slash is okay when it's the first segment
48+
"ssvc/de-DE/reference-arch-1", # valid BCP-47 tag with dashes
49+
"x_test/pl-PL/foo/bar/baz/quux", # valid BCP-47 tag and multiple segments
50+
]
51+
self.expect_fail = [
52+
"999", # invalid namespace, numeric only
53+
"99xx", # invalid namespace, numeric prefix
54+
"x__invalid", # invalid namespace, double underscore
55+
"x_-invalid", # invalid namespace, dash after x_
56+
"x_.invalid", # invalid namespace, dash at end
57+
"x_/foo", # invalid namespace, slash after x_, invalid BCP-47 tag
58+
"x_//foo", # invalid namespace, double slash after x_
59+
"x_abc/invalid-bcp-47", # not a valid BCP-47 tag
60+
"abc/invalid-bcp-47", # not in enum (but that's ok for the pattern), not a valid BCP-47 tag
61+
"abc/invalid", # not in enum (but that's ok for the pattern), not a valid BCP-47 tag
62+
"x_custom/extension", # not a valid BCP-47 tag
63+
"x_test/not-bcp-47", # not a valid BCP-47 tag
64+
"x_custom/extension/with/multiple/segments/"
65+
+ "a" * 990, # exceeds max length
66+
"x_custom.extension.", # ends with punctuation
67+
"x_custom..extension", # double dot
68+
"x_custom/", # ends with slash
69+
"x_custom/extension//", # double slash at end
70+
"x_custom/extension/with//double/slash", # double slash in middle
71+
"x_custom/extension/with..double.dot", # double dot in middle
72+
"x_custom/extension/with--double-dash", # double dash in middle
73+
"ab", # too short
74+
"x_", # too short after prefix
75+
]
76+
77+
def test_ns_pattern(self):
78+
79+
self._test_successes_failures(
80+
NS_PATTERN.pattern, self.expect_fail, self.expect_success
81+
)
82+
83+
def test_base_pattern(self):
84+
x_success = [
85+
"abc",
86+
"contains.dot",
87+
"contains-dash",
88+
"contains-dash-and.dot",
89+
]
90+
x_fail = [
91+
"a", # too short
92+
"ab", # too short
93+
"9abc", # starts with a number
94+
"x_foo", # no x_ in base pattern
95+
"contains..double.dot", # double dot
96+
"contains--double-dash", # double dash
97+
"contains_underscore", # underscore not allowed
98+
"contains/slash", # slash not allowed
99+
".starts.with.dot", # starts with a dot
100+
"-starts-with-dash", # starts with a dash
101+
"/starts-with-slash", # starts with a slash
102+
"_starts-with-underscore", # starts with an underscore
103+
"ends-with-dot.", # ends with a dot
104+
"ends-with-dash-", # ends with a dash
105+
"ends-with-slash/", # ends with a slash
106+
]
107+
self._test_successes_failures(BASE_PATTERN, x_fail, x_success)
108+
109+
def test_experimental_base_pattern(self):
110+
x_success = [
111+
"x_abc",
112+
"x_custom",
113+
"x_custom.with.dots", # dots are allowed in the base pattern
114+
"x_custom-with-dashes", # dashes are allowed in the base pattern
115+
]
116+
x_fail = [
117+
"9abc", # does not start with x_
118+
"x__invalid", # double underscore
119+
"x_-invalid", # dash after x_
120+
"x_.invalid", # dash at end
121+
"x_9abc", # starts with a number after x_
122+
"x_abc.", # ends with a dot
123+
"x_abc-", # ends with a dash
124+
"x_abc/", # ends with a slash
125+
"x_/foo", # slashes aren't part of the base pattern
126+
]
127+
self._test_successes_failures(BASE_NS_PATTERN, x_fail, x_success)
128+
129+
def test_base_ns_pattern(self):
130+
x_success = [
131+
"abc",
132+
"x_abc",
133+
"x_custom",
134+
"x_custom.with.dots", # dots are allowed in the base pattern
135+
"x_custom-with-dashes", # dashes are allowed in the base pattern
136+
]
137+
x_fail = [
138+
"9abc", # starts with a number
139+
"x__invalid", # double underscore
140+
"x_-invalid", # dash after x_
141+
"x_.invalid", # dash at end
142+
"x_9abc", # starts with a number after x_
143+
"x_abc.", # ends with a dot
144+
"x_abc-", # ends with a dash
145+
"x_abc/", # ends with a slash
146+
"x_/foo", # slashes aren't part of the base pattern
147+
]
148+
self._test_successes_failures(BASE_NS_PATTERN, x_fail, x_success)
149+
150+
def _test_successes_failures(
151+
self, pattern: str, x_fail: list[str], x_success: list[str]
152+
):
153+
successes = []
154+
failures = []
155+
# if pattern is not anchored, anchor it
156+
if not pattern.startswith("^"):
157+
pattern = "^" + pattern
158+
if not pattern.endswith("$"):
159+
pattern = pattern + "$"
160+
161+
for ns in x_success:
162+
expected = f"Should match {ns}"
163+
if re.match(pattern, ns) is None:
164+
failures.append(expected)
165+
else:
166+
successes.append(expected)
167+
for ns in x_fail:
168+
expected = f"Should not match {ns}"
169+
if re.match(pattern, ns) is not None:
170+
failures.append(expected)
171+
else:
172+
successes.append(expected)
173+
logger.debug(f"Successes: {successes}")
174+
self.assertFalse(failures)
175+
176+
def test_length_check_pattern(self):
177+
"""
178+
Test the length check pattern for namespaces.
179+
The pattern should enforce a minimum and maximum length.
180+
"""
181+
min_length = MIN_NS_LENGTH
182+
max_length = MAX_NS_LENGTH
183+
184+
valid_ns = "x_valid_namespace"
185+
too_short_ns = "x_v"
186+
too_long_ns = "x_" + "a" * (max_length - 2)
187+
188+
for i in range(0, MIN_NS_LENGTH):
189+
# should fail for lengths less than MIN_NS_LENGTH
190+
ns = "a" * i
191+
self.assertIsNone(
192+
re.match(LENGTH_CHECK_PATTERN, ns), f"Should not match: {ns}"
193+
)
194+
195+
196+
if __name__ == "__main__":
197+
unittest.main()

0 commit comments

Comments
 (0)