Skip to content

Commit 93f5ea2

Browse files
authored
refactor: have a single function for normalized PyPI package names (#1329)
Before this PR there were at least 2 places where such a helper function existed and it made it very easy to make another copy. This PR introduces a hardened version, that follows conventions from upstream PyPI and tests have been added. Split from #1294, work towards #1262.
1 parent 5c5ab5b commit 93f5ea2

File tree

6 files changed

+124
-26
lines changed

6 files changed

+124
-26
lines changed

python/extensions/pip.bzl

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ load(
2626
"whl_library",
2727
)
2828
load("@rules_python//python/pip_install:requirements_parser.bzl", parse_requirements = "parse")
29+
load("//python/private:normalize_name.bzl", "normalize_name")
2930

3031
def _whl_mods_impl(mctx):
3132
"""Implementation of the pip.whl_mods tag class.
@@ -130,7 +131,7 @@ def _create_versioned_pip_and_whl_repos(module_ctx, pip_attr, whl_map):
130131
# would need to guess what name we modified the whl name
131132
# to.
132133
annotation = whl_modifications.get(whl_name)
133-
whl_name = _sanitize_name(whl_name)
134+
whl_name = normalize_name(whl_name)
134135
whl_library(
135136
name = "%s_%s" % (pip_name, whl_name),
136137
requirement = requirement_line,
@@ -318,10 +319,6 @@ def _pip_impl(module_ctx):
318319
whl_library_alias_names = whl_map.keys(),
319320
)
320321

321-
# Keep in sync with python/pip_install/tools/bazel.py
322-
def _sanitize_name(name):
323-
return name.replace("-", "_").replace(".", "_").lower()
324-
325322
def _pip_parse_ext_attrs():
326323
attrs = dict({
327324
"hub_name": attr.string(

python/pip_install/pip_repository.bzl

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ load("//python/pip_install:repositories.bzl", "all_requirements")
2020
load("//python/pip_install:requirements_parser.bzl", parse_requirements = "parse")
2121
load("//python/pip_install/private:srcs.bzl", "PIP_INSTALL_PY_SRCS")
2222
load("//python/private:bzlmod_enabled.bzl", "BZLMOD_ENABLED")
23+
load("//python/private:normalize_name.bzl", "normalize_name")
2324
load("//python/private:toolchains_repo.bzl", "get_host_os_arch")
2425

2526
CPPFLAGS = "CPPFLAGS"
@@ -267,10 +268,6 @@ A requirements_lock attribute must be specified, or a platform-specific lockfile
267268
""")
268269
return requirements_txt
269270

270-
# Keep in sync with `_clean_pkg_name` in generated bzlmod requirements.bzl
271-
def _clean_pkg_name(name):
272-
return name.replace("-", "_").replace(".", "_").lower()
273-
274271
def _pkg_aliases(rctx, repo_name, bzl_packages):
275272
"""Create alias declarations for each python dependency.
276273
@@ -376,7 +373,7 @@ def _pip_repository_bzlmod_impl(rctx):
376373
content = rctx.read(requirements_txt)
377374
parsed_requirements_txt = parse_requirements(content)
378375

379-
packages = [(_clean_pkg_name(name), requirement) for name, requirement in parsed_requirements_txt.requirements]
376+
packages = [(normalize_name(name), requirement) for name, requirement in parsed_requirements_txt.requirements]
380377

381378
bzl_packages = sorted([name for name, _ in packages])
382379
_create_pip_repository_bzlmod(rctx, bzl_packages, str(requirements_txt))
@@ -422,7 +419,7 @@ def _pip_repository_impl(rctx):
422419
content = rctx.read(requirements_txt)
423420
parsed_requirements_txt = parse_requirements(content)
424421

425-
packages = [(_clean_pkg_name(name), requirement) for name, requirement in parsed_requirements_txt.requirements]
422+
packages = [(normalize_name(name), requirement) for name, requirement in parsed_requirements_txt.requirements]
426423

427424
bzl_packages = sorted([name for name, _ in packages])
428425

@@ -432,7 +429,7 @@ def _pip_repository_impl(rctx):
432429

433430
annotations = {}
434431
for pkg, annotation in rctx.attr.annotations.items():
435-
filename = "{}.annotation.json".format(_clean_pkg_name(pkg))
432+
filename = "{}.annotation.json".format(normalize_name(pkg))
436433
rctx.file(filename, json.encode_indent(json.decode(annotation)))
437434
annotations[pkg] = "@{name}//:{filename}".format(name = rctx.attr.name, filename = filename)
438435

python/pip_install/tools/lib/bazel.py

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import re
16+
1517
WHEEL_FILE_LABEL = "whl"
1618
PY_LIBRARY_LABEL = "pkg"
1719
DATA_LABEL = "data"
@@ -22,21 +24,9 @@
2224
def sanitise_name(name: str, prefix: str) -> str:
2325
"""Sanitises the name to be compatible with Bazel labels.
2426
25-
There are certain requirements around Bazel labels that we need to consider. From the Bazel docs:
26-
27-
Package names must be composed entirely of characters drawn from the set A-Z, a–z, 0–9, '/', '-', '.', and '_',
28-
and cannot start with a slash.
29-
30-
Due to restrictions on Bazel labels we also cannot allow hyphens. See
31-
https://github.com/bazelbuild/bazel/issues/6841
32-
33-
Further, rules-python automatically adds the repository root to the PYTHONPATH, meaning a package that has the same
34-
name as a module is picked up. We workaround this by prefixing with `pypi__`. Alternatively we could require
35-
`--noexperimental_python_import_all_repositories` be set, however this breaks rules_docker.
36-
See: https://github.com/bazelbuild/bazel/issues/2636
27+
See the doc in ../../../private/normalize_name.bzl.
3728
"""
38-
39-
return prefix + name.replace("-", "_").replace(".", "_").lower()
29+
return prefix + re.sub(r"[-_.]+", "_", name).lower()
4030

4131

4232
def _whl_name_to_repo_root(whl_name: str, repo_prefix: str) -> str:

python/private/normalize_name.bzl

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# Copyright 2023 The Bazel Authors. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""
16+
Normalize a PyPI package name to allow consistent label names
17+
18+
Note we chose `_` instead of `-` as a separator as there are certain
19+
requirements around Bazel labels that we need to consider.
20+
21+
From the Bazel docs:
22+
> Package names must be composed entirely of characters drawn from the set
23+
> A-Z, a–z, 0–9, '/', '-', '.', and '_', and cannot start with a slash.
24+
25+
However, due to restrictions on Bazel labels we also cannot allow hyphens.
26+
See https://github.com/bazelbuild/bazel/issues/6841
27+
28+
Further, rules_python automatically adds the repository root to the
29+
PYTHONPATH, meaning a package that has the same name as a module is picked
30+
up. We workaround this by prefixing with `<hub_name>_`.
31+
32+
Alternatively we could require
33+
`--noexperimental_python_import_all_repositories` be set, however this
34+
breaks rules_docker.
35+
See: https://github.com/bazelbuild/bazel/issues/2636
36+
37+
Also see Python spec on normalizing package names:
38+
https://packaging.python.org/en/latest/specifications/name-normalization/
39+
"""
40+
41+
# Keep in sync with ../pip_install/tools/lib/bazel.py
42+
def normalize_name(name):
43+
"""normalize a PyPI package name and return a valid bazel label.
44+
45+
Args:
46+
name: str, the PyPI package name.
47+
48+
Returns:
49+
a normalized name as a string.
50+
"""
51+
name = name.replace("-", "_").replace(".", "_").lower()
52+
if "__" not in name:
53+
return name
54+
55+
# Handle the edge-case where there are consecutive `-`, `_` or `.` characters,
56+
# which is a valid Python package name.
57+
return "_".join([
58+
part
59+
for part in name.split("_")
60+
if part
61+
])
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
load(":normalize_name_tests.bzl", "normalize_name_test_suite")
2+
3+
normalize_name_test_suite(name = "normalize_name_tests")
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# Copyright 2023 The Bazel Authors. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
""
16+
17+
load("@rules_testing//lib:test_suite.bzl", "test_suite")
18+
load("//python/private:normalize_name.bzl", "normalize_name") # buildifier: disable=bzl-visibility
19+
20+
_tests = []
21+
22+
def _test_name_normalization(env):
23+
want = {
24+
input: "friendly_bard"
25+
for input in [
26+
"friendly-bard",
27+
"Friendly-Bard",
28+
"FRIENDLY-BARD",
29+
"friendly.bard",
30+
"friendly_bard",
31+
"friendly--bard",
32+
"FrIeNdLy-._.-bArD",
33+
]
34+
}
35+
36+
actual = {
37+
input: normalize_name(input)
38+
for input in want.keys()
39+
}
40+
env.expect.that_dict(actual).contains_exactly(want)
41+
42+
_tests.append(_test_name_normalization)
43+
44+
def normalize_name_test_suite(name):
45+
"""Create the test suite.
46+
47+
Args:
48+
name: the name of the test suite
49+
"""
50+
test_suite(name = name, basic_tests = _tests)

0 commit comments

Comments
 (0)