Skip to content

Commit 8e3caa9

Browse files
authored
update parsed verifysdist and verifywhl to check metadata (#42619)
* add metadata extraction to ParsedSetup * add check to verify sdist metadata * add verify against prior version metadata * normalize metadata b/w pep 621 and 566 * verify whl metadata * clean up * more clean up * cleanup 3 * copilot comments * add tests * address comment * nit * uncomment tests
1 parent 6095433 commit 8e3caa9

File tree

25 files changed

+718
-40
lines changed

25 files changed

+718
-40
lines changed

eng/tools/azure-sdk-tools/ci_tools/parsing/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
get_ci_config,
1212
get_version_py,
1313
get_pyproject,
14+
extract_package_metadata,
1415
VERSION_REGEX,
1516
VERSION_PY,
1617
OLD_VERSION_PY
@@ -29,6 +30,7 @@
2930
"get_ci_config",
3031
"get_version_py",
3132
"get_pyproject",
33+
"extract_package_metadata",
3234
"VERSION_REGEX",
3335
"VERSION_PY",
3436
"OLD_VERSION_PY"

eng/tools/azure-sdk-tools/ci_tools/parsing/parse_functions.py

Lines changed: 181 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# otherwise fall back to pypi package tomli
1313
import tomli as toml
1414

15-
from typing import Dict, List, Tuple, Any, Optional
15+
from typing import Dict, List, Tuple, Any, Optional, Union
1616

1717
# this assumes the presence of "packaging"
1818
from packaging.requirements import Requirement
@@ -50,20 +50,179 @@
5050
}
5151

5252

53+
def extract_package_metadata(package_path: str) -> Dict[str, Any]:
54+
"""Extract package metadata from a built package or source directory with comprehensive PEP 566/621 normalization."""
55+
from pkginfo import get_metadata
56+
try:
57+
# Note: metadata may be different between source directory and built packages since
58+
# some metadata may be normalized/transformed during build process
59+
pkg_info = get_metadata(package_path)
60+
61+
if not pkg_info:
62+
return {}
63+
64+
# Convert pkginfo object to dictionary with normalized keys
65+
metadata: Dict[str, Any] = {}
66+
67+
# Core metadata fields (always normalized to same key names)
68+
if pkg_info.name:
69+
metadata['name'] = pkg_info.name
70+
if pkg_info.version:
71+
metadata['version'] = pkg_info.version
72+
if pkg_info.keywords:
73+
metadata['keywords'] = pkg_info.keywords
74+
75+
# Summary/Description normalization
76+
if pkg_info.summary:
77+
metadata['summary'] = pkg_info.summary
78+
if pkg_info.description:
79+
metadata['description'] = pkg_info.description
80+
81+
# Classifiers (consistent across PEPs)
82+
if pkg_info.classifiers:
83+
metadata['classifiers'] = pkg_info.classifiers
84+
85+
# Python version requirements
86+
if pkg_info.requires_python:
87+
metadata['requires_python'] = pkg_info.requires_python
88+
89+
# Dependencies normalization
90+
if pkg_info.requires_dist:
91+
metadata['requires_dist'] = pkg_info.requires_dist
92+
93+
# Author/Maintainer normalization - handle both simple and complex formats
94+
_normalize_person_fields(pkg_info, metadata, 'author')
95+
_normalize_person_fields(pkg_info, metadata, 'maintainer')
96+
97+
# License normalization - handle both PEP 566 and PEP 621 formats
98+
_normalize_license_field(pkg_info, metadata)
99+
100+
# URL normalization - handle both home_page and project_urls
101+
_normalize_url_fields(pkg_info, metadata)
102+
103+
# Additional optional fields
104+
_add_optional_fields(pkg_info, metadata)
105+
return metadata
106+
107+
except Exception as e:
108+
logging.warning(f"Error extracting metadata from {package_path}: {e}")
109+
return {}
110+
111+
112+
def _normalize_person_fields(pkg_info, metadata: Dict[str, Any], role: str) -> None:
113+
"""Normalize author/maintainer fields from both PEP 566 and PEP 621 formats."""
114+
name_attr = getattr(pkg_info, role, None)
115+
email_attr = getattr(pkg_info, f'{role}_email', None)
116+
117+
# Handle PEP 566 style (separate fields)
118+
if name_attr and email_attr:
119+
metadata[role] = name_attr
120+
metadata[f'{role}_email'] = email_attr
121+
# Handle PEP 621 style where name might be embedded in email field
122+
elif email_attr:
123+
# Check if email contains name in format "Name <email>"
124+
if '<' in email_attr and '>' in email_attr:
125+
# Extract name and email from "Name <email>" format
126+
match = re.match(r'^(.+?)\s*<(.+?)>$', email_attr.strip())
127+
if match:
128+
name_part = match.group(1).strip()
129+
email_part = match.group(2).strip()
130+
metadata[role] = name_part
131+
metadata[f'{role}_email'] = email_part
132+
else:
133+
metadata[f'{role}_email'] = email_attr
134+
else:
135+
metadata[f'{role}_email'] = email_attr
136+
# Handle case where only name is provided
137+
elif name_attr:
138+
metadata[f'{role}'] = name_attr
139+
140+
141+
def _normalize_license_field(pkg_info, metadata: Dict[str, Any]) -> None:
142+
"""Normalize license field from both PEP 566 and PEP 621 formats."""
143+
if pkg_info.license:
144+
metadata['license'] = pkg_info.license
145+
# Handle license expression (PEP 639) if available
146+
if hasattr(pkg_info, 'license_expression') and getattr(pkg_info, 'license_expression', None):
147+
metadata['license'] = pkg_info.license_expression
148+
# Handle license file references if available
149+
if hasattr(pkg_info, 'license_file') and getattr(pkg_info, 'license_file', None):
150+
metadata['license'] = pkg_info.license_file
151+
152+
153+
def _normalize_url_fields(pkg_info, metadata: Dict[str, Any]) -> None:
154+
"""Normalize URL fields from both PEP 566 and PEP 621 formats."""
155+
# Homepage from PEP 566 style
156+
if pkg_info.home_page:
157+
metadata['homepage'] = pkg_info.home_page
158+
159+
# Handle project URLs (can be in various formats)
160+
if pkg_info.project_urls:
161+
metadata['project_urls'] = pkg_info.project_urls
162+
163+
# Try to extract homepage from project_urls if not already set
164+
if 'homepage' not in metadata:
165+
homepage = _extract_homepage_from_project_urls(pkg_info.project_urls)
166+
if homepage:
167+
metadata['homepage'] = homepage
168+
169+
# Download URL
170+
if hasattr(pkg_info, 'download_url') and getattr(pkg_info, 'download_url', None):
171+
metadata['download_url'] = pkg_info.download_url
172+
173+
174+
def _extract_homepage_from_project_urls(project_urls) -> Optional[str]:
175+
"""Extract homepage URL from project_urls in various formats."""
176+
if not project_urls:
177+
return None
178+
179+
# Handle different project_urls formats
180+
if isinstance(project_urls, (list, tuple)):
181+
for url_entry in project_urls:
182+
if isinstance(url_entry, str) and ',' in url_entry:
183+
# Format: "Homepage, https://example.com"
184+
url_type, url_value = url_entry.split(',', 1)
185+
url_type = url_type.strip().lower()
186+
url_value = url_value.strip()
187+
if url_type in ['homepage', 'home-page', 'home', 'website']:
188+
return url_value
189+
elif isinstance(project_urls, dict):
190+
# Handle dictionary format
191+
for key, value in project_urls.items():
192+
if key.lower() in ['homepage', 'home-page', 'home', 'website']:
193+
return value
194+
195+
return None
196+
197+
198+
def _add_optional_fields(pkg_info, metadata: Dict[str, Any]) -> None:
199+
"""Add optional metadata fields that may be present."""
200+
optional_fields = [
201+
'obsoletes_dist', 'provides_dist', 'requires_external',
202+
'platform', 'supported_platform'
203+
]
204+
205+
for field in optional_fields:
206+
if hasattr(pkg_info, field):
207+
value = getattr(pkg_info, field, None)
208+
if value:
209+
metadata[field] = value
210+
211+
53212
def discover_namespace(package_root_path: str) -> Optional[str]:
54213
"""
55214
Discover the true namespace of a package by walking through its directory structure
56215
and finding the first __init__.py that contains actual content (not just namespace extension).
57-
216+
58217
:param str package_root_path: Root path of the package directory
59218
:rtype: str or None
60219
:return: The discovered namespace string, or None if no suitable namespace found
61220
"""
62221
if not os.path.exists(package_root_path):
63222
return None
64-
223+
65224
namespace = None
66-
225+
67226
for root, subdirs, files in os.walk(package_root_path):
68227
# Ignore any modules with name starts with "_"
69228
# For e.g. _generated, _shared etc
@@ -73,26 +232,26 @@ def discover_namespace(package_root_path: str) -> Optional[str]:
73232
for d in dirs_to_skip:
74233
logging.debug("Dirs to skip: {}".format(dirs_to_skip))
75234
subdirs.remove(d)
76-
235+
77236
if INIT_PY_FILE in files:
78237
module_name = os.path.relpath(root, package_root_path).replace(
79238
os.path.sep, "."
80239
)
81-
240+
82241
# If namespace has not been set yet, try to find the first __init__.py that's not purely for extension.
83242
if not namespace:
84243
namespace = _set_root_namespace(
85244
os.path.join(root, INIT_PY_FILE), module_name
86245
)
87-
246+
88247
return namespace
89248

90249

91250
def _set_root_namespace(init_file_path: str, module_name: str) -> Optional[str]:
92251
"""
93252
Examine an __init__.py file to determine if it represents a substantial namespace
94253
or is just a namespace extension file.
95-
254+
96255
:param str init_file_path: Path to the __init__.py file
97256
:param str module_name: The module name corresponding to this __init__.py
98257
:rtype: str or None
@@ -111,16 +270,16 @@ def _set_root_namespace(init_file_path: str, module_name: str) -> Optional[str]:
111270
# If comment, skip line. Otherwise, add to content.
112271
if not in_docstring and not stripped_line.startswith("#"):
113272
content.append(line)
114-
273+
115274
# If there's more than one line of content, or if there's one line that's not just namespace extension
116275
if len(content) > 1 or (
117276
len(content) == 1 and INIT_EXTENSION_SUBSTRING not in content[0]
118277
):
119278
return module_name
120-
279+
121280
except Exception as e:
122281
logging.error(f"Error reading {init_file_path}: {e}")
123-
282+
124283
return None
125284

126285

@@ -168,7 +327,8 @@ def __init__(
168327
@classmethod
169328
def from_path(cls, parse_directory_or_file: str):
170329
"""
171-
Creates a new ParsedSetup instance from a path to a setup.py, pyproject.toml (with [project] member), or a directory containing either of those files.
330+
Creates a new ParsedSetup instance from a path to a setup.py, pyproject.toml (with [project] member),
331+
or a directory containing either of those files.
172332
"""
173333
(
174334
name,
@@ -370,9 +530,9 @@ def setup(*args, **kwargs):
370530

371531
fixed = ast.fix_missing_locations(parsed)
372532
codeobj = compile(fixed, setup_filename, "exec")
373-
local_vars = {}
533+
local_vars: Dict[str, Any] = {}
374534
kwargs = {}
375-
global_vars = {"__setup_calls__": []}
535+
global_vars: Dict[str, Any] = {"__setup_calls__": []}
376536
current_dir = os.getcwd()
377537
working_dir = os.path.dirname(setup_filename)
378538
os.chdir(working_dir)
@@ -390,7 +550,7 @@ def setup(*args, **kwargs):
390550

391551
version = kwargs.get("version")
392552
name = kwargs.get("name")
393-
name_space = name.replace("-", ".")
553+
name_space = name.replace("-", ".") if name else ""
394554
packages = kwargs.get("packages", [])
395555

396556
if packages:
@@ -588,11 +748,12 @@ def parse_setup(
588748
<keywords>,
589749
<ext_packages>,
590750
<ext_modules>,
591-
<is_metapackage>
751+
<is_metapackage>,
592752
)
593753
594754
If a pyproject.toml (containing [project]) or a setup.py is NOT found, a ValueError will be raised.
595755
"""
756+
596757
targeted_path = setup_filename_or_folder
597758
if os.path.isfile(setup_filename_or_folder):
598759
targeted_path = os.path.dirname(setup_filename_or_folder)
@@ -602,9 +763,11 @@ def parse_setup(
602763
raise ValueError(f"Unable to find a setup.py or pyproject.toml in {setup_filename_or_folder}")
603764

604765
if resolved_filename.endswith(".toml"):
605-
return parse_pyproject(resolved_filename)
766+
result = parse_pyproject(resolved_filename)
606767
else:
607-
return parse_setup_py(resolved_filename)
768+
result = parse_setup_py(resolved_filename)
769+
770+
return result
608771

609772

610773
def get_pyproject_dict(pyproject_file: str) -> Dict[str, Any]:

eng/tools/azure-sdk-tools/pypi_tools/pypi.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,3 +81,20 @@ def get_relevant_versions(self, package_name):
8181
versions = self.get_ordered_versions(package_name)
8282
stable_releases = [version for version in versions if not version.is_prerelease]
8383
return (versions[-1], stable_releases[-1])
84+
85+
def retrieve_versions_from_pypi(package_name: str) -> List[str]:
86+
"""
87+
Retrieve all published versions on PyPI for the package.
88+
89+
:param str package_name: The name of the package.
90+
:rtype: List[str]
91+
:return: List of all version strings (sorted ascending).
92+
"""
93+
try:
94+
client = PyPIClient()
95+
all_versions = client.get_ordered_versions(package_name)
96+
# Return all versions as strings
97+
return [str(v) for v in all_versions]
98+
except Exception as ex:
99+
logging.warning("Failed to retrieve PyPI data for %s: %s", package_name, ex)
100+
return []
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
This folder contains a beta release `pyproject.toml` scenario for azure-mgmt-planetarycomputer with invalid metadata (missing author name and homepage). We use this to ensure that our metadata verification within ParsedSetup properly detects missing required fields for beta packages.
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# --------------------------------------------------------------------------------------------
2+
# Copyright (c) Microsoft Corporation. All rights reserved.
3+
# Licensed under the MIT License. See License.txt in the project root for license information.
4+
# --------------------------------------------------------------------------------------------
5+
6+
VERSION = "1.0.0b2"
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
[build-system]
2+
requires = ["setuptools>=77.0.3", "wheel"]
3+
build-backend = "setuptools.build_meta"
4+
5+
[project]
6+
name = "azure-mgmt-planetarycomputer"
7+
authors = [
8+
{name = "Microsoft Corporation"},
9+
]
10+
keywords = ["azure", "azure sdk"]
11+
requires-python = ">=3.9"
12+
license = "MIT"
13+
classifiers = [
14+
"Development Status :: 4 - Beta",
15+
"Programming Language :: Python",
16+
"Programming Language :: Python :: 3 :: Only",
17+
"Programming Language :: Python :: 3",
18+
"Programming Language :: Python :: 3.9",
19+
"Programming Language :: Python :: 3.10",
20+
"Programming Language :: Python :: 3.11",
21+
"Programming Language :: Python :: 3.12",
22+
]
23+
dependencies = [
24+
"isodate>=0.6.1",
25+
"typing-extensions>=4.6.0",
26+
"azure-common>=1.1",
27+
"azure-mgmt-core>=1.5.0"
28+
]
29+
dynamic = ["version", "readme"]
30+
31+
[project.urls]
32+
Homepage = "https://github.com/Azure/azure-sdk-for-python"
33+
34+
[tool.setuptools]
35+
include-package-data = true
36+
37+
[tool.setuptools.packages.find]
38+
exclude = ["tests", "azure", "azure.mgmt"]
39+
40+
[tool.setuptools.package-data]
41+
pytyped = ["py.typed"]
42+
43+
[tool.setuptools.dynamic]
44+
version = {attr = "azure.mgmt.planetarycomputer._version.VERSION"}
45+
readme = {file = "README.md", content-type = "text/markdown"}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
This folder contains a basic beta release `pyproject.toml` scenario for azure-mgmt-planetarycomputer. We use this to ensure that our metadata verification within ParsedSetup is working properly for beta packages.

0 commit comments

Comments
 (0)