Skip to content

Commit 3803ce3

Browse files
authored
perf: translate for canonicalize_name (#1030)
* perf: use translate Signed-off-by: Henry Schreiner <[email protected]> perf: use translate and replace Signed-off-by: Henry Schreiner <[email protected]> perf: also translate letters Signed-off-by: Henry Schreiner <[email protected]> * Update src/packaging/utils.py --------- Signed-off-by: Henry Schreiner <[email protected]>
1 parent 52e57f2 commit 3803ce3

File tree

1 file changed

+9
-3
lines changed

1 file changed

+9
-3
lines changed

src/packaging/utils.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ class InvalidSdistFilename(ValueError):
3434

3535
# Core metadata spec for `Name`
3636
_validate_regex = re.compile(r"[A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9]", re.IGNORECASE)
37-
_canonicalize_regex = re.compile(r"[-_.]+")
37+
_letters_translate = {c: c.lower() for c in "ABCDEFGHIJKLMNOPQRSTUVWXYZ"}
38+
_canonicalize_table = str.maketrans({"_": "-", ".": "-", **_letters_translate})
3839
_normalized_regex = re.compile(r"[a-z0-9]|[a-z0-9]([a-z0-9-](?!--))*[a-z0-9]")
3940
# PEP 427: The build number must start with a digit.
4041
_build_tag_regex = re.compile(r"(\d+)(.*)")
@@ -43,8 +44,13 @@ class InvalidSdistFilename(ValueError):
4344
def canonicalize_name(name: str, *, validate: bool = False) -> NormalizedName:
4445
if validate and not _validate_regex.fullmatch(name):
4546
raise InvalidName(f"name is invalid: {name!r}")
46-
# This is taken from PEP 503.
47-
value = _canonicalize_regex.sub("-", name).lower()
47+
# Ensure all ``.`` and ``_`` are ``-``
48+
# Emulates ``re.sub(r"[-_.]+", "-", name).lower()`` from PEP 503
49+
# About 2x faster, safe since packages only support alphanumeric characters
50+
value = name.translate(_canonicalize_table)
51+
# Condense repeats (faster than regex)
52+
while "--" in value:
53+
value = value.replace("--", "-")
4854
return cast("NormalizedName", value)
4955

5056

0 commit comments

Comments
 (0)