Skip to content

Commit 93ee7f1

Browse files
woodruffwdi
andauthored
metadata: initial PEP 753 bits (#17470)
* metadata: initial PEP 753 bits Signed-off-by: William Woodruff <[email protected]> * fix lint Signed-off-by: William Woodruff <[email protected]> * update comment Signed-off-by: William Woodruff <[email protected]> * use a shorter URL Signed-off-by: William Woodruff <[email protected]> --------- Signed-off-by: William Woodruff <[email protected]> Co-authored-by: Dustin Ingram <[email protected]>
1 parent a68ad6d commit 93ee7f1

File tree

3 files changed

+75
-44
lines changed

3 files changed

+75
-44
lines changed

tests/unit/forklift/test_metadata.py

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
import pytest
1515

1616
from packaging.version import Version
17-
from sqlalchemy.dialects.postgresql import ENUM
1817
from webob.multidict import MultiDict
1918

2019
from warehouse.forklift import metadata
@@ -91,7 +90,7 @@ def test_length_is_limited(self, field_name, length):
9190
metadata_version="2.1",
9291
name="spam",
9392
version="2.0",
94-
**{field_name: "a" * (length - 1)}
93+
**{field_name: "a" * (length - 1)},
9594
)
9695
meta = metadata.parse(None, form_data=data)
9796
assert getattr(meta, field_name) == "a" * (length - 1)
@@ -101,7 +100,7 @@ def test_length_is_limited(self, field_name, length):
101100
metadata_version="2.1",
102101
name="spam",
103102
version="2.0",
104-
**{field_name: "a" * (length + 1)}
103+
**{field_name: "a" * (length + 1)},
105104
)
106105
with pytest.raises(ExceptionGroup) as excinfo:
107106
metadata.parse(None, form_data=data)
@@ -113,7 +112,7 @@ def test_valid_emails(self, field_name):
113112
metadata_version="2.1",
114113
name="spam",
115114
version="2.0",
116-
**{field_name: "[email protected]"}
115+
**{field_name: "[email protected]"},
117116
)
118117
meta = metadata.parse(None, form_data=data)
119118
assert getattr(meta, field_name) == "[email protected]"
@@ -124,7 +123,7 @@ def test_invalid_emails(self, field_name):
124123
metadata_version="2.1",
125124
name="spam",
126125
version="2.0",
127-
**{field_name: "Foo <test>"}
126+
**{field_name: "Foo <test>"},
128127
)
129128
with pytest.raises(ExceptionGroup) as excinfo:
130129
metadata.parse(None, form_data=data)
@@ -237,7 +236,7 @@ def test_valid_dists(self, field_name):
237236
metadata_version="2.1",
238237
name="spam",
239238
version="2.0",
240-
**{field_name: "foo>=1.0"}
239+
**{field_name: "foo>=1.0"},
241240
)
242241
meta = metadata.parse(None, form_data=data)
243242
assert [str(r) for r in getattr(meta, field_name)] == ["foo>=1.0"]
@@ -252,7 +251,7 @@ def test_invalid_dists(self, field_name):
252251
metadata_version="2.1",
253252
name="spam",
254253
version="2.0",
255-
**{field_name: "foo >= dog"}
254+
**{field_name: "foo >= dog"},
256255
)
257256
with pytest.raises(
258257
(
@@ -269,7 +268,7 @@ def test_invalid_dists(self, field_name):
269268
metadata_version="2.1",
270269
name="spam",
271270
version="2.0",
272-
**{field_name: "foo @ https://example.com/foo-1.0.tar.gz"}
271+
**{field_name: "foo @ https://example.com/foo-1.0.tar.gz"},
273272
)
274273
with pytest.raises(ExceptionGroup) as excinfo:
275274
metadata.parse(None, form_data=data)
@@ -295,7 +294,7 @@ def test_valid_dynamic_but_missing_from_our_enum(self, monkeypatch):
295294
considers to be valid, but don't exist in our enum and would otherwise fail
296295
when inserting them into the database
297296
"""
298-
monkeypatch.setattr(metadata, "DynamicFieldsEnum", ENUM())
297+
monkeypatch.setattr(metadata, "DYNAMIC_FIELDS", [])
299298
data = MultiDict(metadata_version="2.2", name="spam", version="2.0")
300299
data.add("dynamic", "author")
301300
with pytest.raises(ExceptionGroup) as excinfo:
@@ -353,3 +352,19 @@ def test_empty_strings_are_ignored(self):
353352

354353
meta = metadata.parse_form_metadata(data)
355354
assert meta.description_content_type is None
355+
356+
357+
@pytest.mark.parametrize(
358+
("label", "expected"),
359+
[
360+
("Home-page", "homepage"),
361+
("homepage", "homepage"),
362+
("Home Page", "homepage"),
363+
("HomePage", "homepage"),
364+
("HOMEPAGE", "homepage"),
365+
("What's New", "whatsnew"),
366+
("Change_Log", "changelog"),
367+
],
368+
)
369+
def test_normalize_project_url_label(label, expected):
370+
assert metadata.normalize_project_url_label(label) == expected

warehouse/forklift/metadata.py

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
import email.message
1414
import email.utils
15+
import string
1516
import typing
1617

1718
import email_validator
@@ -30,11 +31,42 @@
3031
from trove_classifiers import all_classifiers, deprecated_classifiers
3132
from webob.multidict import MultiDict
3233

33-
from warehouse.packaging.models import DynamicFieldsEnum
3434
from warehouse.utils import http
3535

3636
SUPPORTED_METADATA_VERSIONS = {"1.0", "1.1", "1.2", "2.1", "2.2", "2.3", "2.4"}
3737

38+
DYNAMIC_FIELDS = [
39+
"Platform",
40+
"Supported-Platform",
41+
"Summary",
42+
"Description",
43+
"Description-Content-Type",
44+
"Keywords",
45+
"Home-Page", # Deprecated, but technically permitted by PEP 643
46+
"Download-Url", # Deprecated, but technically permitted by PEP 643
47+
"Author",
48+
"Author-Email",
49+
"Maintainer",
50+
"Maintainer-Email",
51+
"License",
52+
"License-Expression",
53+
"License-File",
54+
"Classifier",
55+
"Requires-Dist",
56+
"Requires-Python",
57+
"Requires-External",
58+
"Project-Url",
59+
"Provides-Extra",
60+
"Provides-Dist",
61+
"Obsoletes-Dist",
62+
# Although the following are deprecated fields, they are technically
63+
# permitted as dynamic by PEP 643
64+
# https://github.com/pypa/setuptools/issues/4797#issuecomment-2589514950
65+
"Requires",
66+
"Provides",
67+
"Obsoletes",
68+
]
69+
3870
# Mapping of fields on a Metadata instance to any limits on the length of that
3971
# field. Fields without a limit will naturally be unlimited in length.
4072
_LENGTH_LIMITS = {
@@ -241,7 +273,7 @@ def _validate_metadata(metadata: Metadata, *, backfill: bool = False):
241273
for field in {"dynamic"}:
242274
if (value := getattr(metadata, field)) is not None:
243275
for key in value:
244-
if key not in map(str.lower, DynamicFieldsEnum.enums):
276+
if key not in map(str.lower, DYNAMIC_FIELDS):
245277
errors.append(
246278
InvalidMetadata(
247279
_RAW_TO_EMAIL_MAPPING.get(field, field),
@@ -362,3 +394,12 @@ def parse_form_metadata(data: MultiDict) -> Metadata:
362394
# way this function is implemented, our `TypedDict` can only have valid key
363395
# names.
364396
return Metadata.from_raw(typing.cast(RawMetadata, raw))
397+
398+
399+
def normalize_project_url_label(label: str) -> str:
400+
# Normalize a Project-URL label according to the label normalization
401+
# rules in the "Well-Known Project URLs in Metadata" specification:
402+
# <https://packaging.python.org/en/latest/specifications/well-known-project-urls/#label-normalization>
403+
chars_to_remove = string.punctuation + string.whitespace
404+
removal_map = str.maketrans("", "", chars_to_remove)
405+
return label.translate(removal_map).lower()

warehouse/packaging/models.py

Lines changed: 8 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666
from warehouse.authnz import Permissions
6767
from warehouse.classifiers.models import Classifier
6868
from warehouse.events.models import HasEvents
69+
from warehouse.forklift import metadata
6970
from warehouse.integrations.vulnerabilities.models import VulnerabilityRecord
7071
from warehouse.observations.models import HasObservations
7172
from warehouse.organizations.models import (
@@ -559,35 +560,7 @@ class ReleaseURL(db.Model):
559560

560561

561562
DynamicFieldsEnum = ENUM(
562-
"Platform",
563-
"Supported-Platform",
564-
"Summary",
565-
"Description",
566-
"Description-Content-Type",
567-
"Keywords",
568-
"Home-Page", # Deprecated, but technically permitted by PEP 643
569-
"Download-Url", # Deprecated, but technically permitted by PEP 643
570-
"Author",
571-
"Author-Email",
572-
"Maintainer",
573-
"Maintainer-Email",
574-
"License",
575-
"License-Expression",
576-
"License-File",
577-
"Classifier",
578-
"Requires-Dist",
579-
"Requires-Python",
580-
"Requires-External",
581-
"Project-Url",
582-
"Provides-Extra",
583-
"Provides-Dist",
584-
"Obsoletes-Dist",
585-
# Although the following are deprecated fields, they are technically
586-
# permitted as dynamic by PEP 643
587-
# https://github.com/pypa/setuptools/issues/4797#issuecomment-2589514950
588-
"Requires",
589-
"Provides",
590-
"Obsoletes",
563+
*metadata.DYNAMIC_FIELDS,
591564
name="release_dynamic_fields",
592565
)
593566

@@ -763,10 +736,12 @@ def urls(self):
763736
_urls["Download"] = self.download_url
764737

765738
for name, url in self.project_urls.items():
766-
# avoid duplicating homepage/download links in case the same
767-
# url is specified in the pkginfo twice (in the Home-page
768-
# or Download-URL field and again in the Project-URL fields)
769-
comp_name = name.casefold().replace("-", "").replace("_", "")
739+
# NOTE: This avoids duplicating the homepage and download URL links
740+
# if they're present both as project URLs and as standalone fields.
741+
# The deduplication is done with the project label normalization rules
742+
# adopted with PEP 753.
743+
# See https://peps.python.org/pep-0753/
744+
comp_name = metadata.normalize_project_url_label(name)
770745
if comp_name == "homepage" and url == _urls.get("Homepage"):
771746
continue
772747
if comp_name == "downloadurl" and url == _urls.get("Download"):

0 commit comments

Comments
 (0)