|
1 | 1 | """Various checks that require network support.""" |
2 | 2 |
|
| 3 | +import re |
3 | 4 | import socket |
4 | 5 | import traceback |
5 | 6 | import urllib.request |
@@ -337,3 +338,125 @@ def _get_urls(self, pkg): |
337 | 338 |
|
338 | 339 | def schedule(self, pkgs, *args, **kwargs): |
339 | 340 | super().schedule(pkgs[-1], *args, **kwargs) |
| 341 | + |
| 342 | + |
| 343 | +class PyPIAttestationAvailable(results.VersionResult, results.Info): |
| 344 | + """PyPI attestation can be used for the package.""" |
| 345 | + |
| 346 | + def __init__(self, filename, **kwargs): |
| 347 | + super().__init__(**kwargs) |
| 348 | + self.filename = filename |
| 349 | + |
| 350 | + @property |
| 351 | + def desc(self): |
| 352 | + return ( |
| 353 | + f"PyPI attestation is available for distfile {self.filename}. " |
| 354 | + "Consider adding PYPI_VERIFY_REPO." |
| 355 | + ) |
| 356 | + |
| 357 | + |
| 358 | +class PyPIAttestationAvailableCheck(NetworkCheck): |
| 359 | + """Check for available PyPI attestations.""" |
| 360 | + |
| 361 | + required_addons = (addons.UseAddon,) |
| 362 | + |
| 363 | + _source = sources.LatestVersionRepoSource |
| 364 | + |
| 365 | + known_results = frozenset( |
| 366 | + { |
| 367 | + PyPIAttestationAvailable, |
| 368 | + SSLCertificateError, |
| 369 | + } |
| 370 | + ) |
| 371 | + |
| 372 | + pypi_uri_re = re.compile( |
| 373 | + r"^https://files\.pythonhosted\.org/packages/source/./(?P<project>.+?)/" |
| 374 | + r"(?P<filename>[^/]+-(?P<version>[^/]+)\.tar\.gz)$" |
| 375 | + ) |
| 376 | + |
| 377 | + def __init__(self, *args, use_addon, **kwargs): |
| 378 | + super().__init__(*args, **kwargs) |
| 379 | + self.fetch_filter = use_addon.get_filter("fetchables") |
| 380 | + |
| 381 | + def _provenance_check(self, filename, url, *, pkg): |
| 382 | + """Check provenance URLs.""" |
| 383 | + result = None |
| 384 | + try: |
| 385 | + self.session.head(url, allow_redirects=False) |
| 386 | + except RequestError as e: |
| 387 | + pass |
| 388 | + except SSLError as e: |
| 389 | + result = SSLCertificateError(attr, url, str(e), pkg=pkg) |
| 390 | + else: |
| 391 | + result = PyPIAttestationAvailable(filename, pkg=pkg) |
| 392 | + return result |
| 393 | + |
| 394 | + def task_done(self, pkg, filename, future): |
| 395 | + """Determine the result of a given URL verification task.""" |
| 396 | + exc = future.exception() |
| 397 | + if exc is not None: |
| 398 | + # traceback can't be pickled so serialize it |
| 399 | + tb = traceback.format_exc() |
| 400 | + # return exceptions that occurred in threads |
| 401 | + self.results_q.put(tb) |
| 402 | + return |
| 403 | + |
| 404 | + result = future.result() |
| 405 | + if result is not None: |
| 406 | + if pkg is not None: |
| 407 | + # recreate result object with different pkg target and attr |
| 408 | + attrs = result._attrs.copy() |
| 409 | + attrs["filename"] = filename |
| 410 | + result = result._create(**attrs, pkg=pkg) |
| 411 | + self.results_q.put([result]) |
| 412 | + |
| 413 | + def _schedule_check(self, filename, url, executor, futures, **kwargs): |
| 414 | + """Schedule verification method to run in a separate thread against a given URL. |
| 415 | +
|
| 416 | + Note that this tries to avoid hitting the network for the same URL |
| 417 | + twice using a mapping from requested URLs to future objects, adding |
| 418 | + result-checking callbacks to the futures of existing URLs. |
| 419 | + """ |
| 420 | + future = futures.get(url) |
| 421 | + if future is None: |
| 422 | + future = executor.submit(self._provenance_check, filename, url, **kwargs) |
| 423 | + future.add_done_callback(partial(self.task_done, None, None)) |
| 424 | + futures[url] = future |
| 425 | + else: |
| 426 | + future.add_done_callback(partial(self.task_done, kwargs["pkg"], filename)) |
| 427 | + |
| 428 | + def _get_urls(self, pkg): |
| 429 | + # ignore conditionals |
| 430 | + fetchables, _ = self.fetch_filter( |
| 431 | + (fetchable,), |
| 432 | + pkg, |
| 433 | + pkg.generate_fetchables( |
| 434 | + allow_missing_checksums=True, ignore_unknown_mirrors=True, skip_default_mirrors=True |
| 435 | + ), |
| 436 | + ) |
| 437 | + for f in fetchables.keys(): |
| 438 | + for url in f.uri: |
| 439 | + if m := self.pypi_uri_re.match(url): |
| 440 | + provenance_url = ( |
| 441 | + f"https://pypi.org/integrity/{m.group('project')}/" |
| 442 | + f"v{m.group('version')}/{m.group('filename')}/provenance" |
| 443 | + ) |
| 444 | + yield (f.filename, provenance_url) |
| 445 | + return [] |
| 446 | + |
| 447 | + def schedule(self, pkg, executor, futures): |
| 448 | + """Schedule verification methods to run in separate threads for all flagged URLs.""" |
| 449 | + |
| 450 | + # short-circuit for packages not using pypi.eclass |
| 451 | + # (they will be reported separately as missing the eclass) |
| 452 | + if "pypi" not in pkg.inherited: |
| 453 | + return |
| 454 | + |
| 455 | + # skip ebuilds that enable attestations already |
| 456 | + with pkg.ebuild.bytes_fileobj() as f: |
| 457 | + for line in f.readlines(): |
| 458 | + if line.startswith(b"PYPI_VERIFY_REPO="): |
| 459 | + return |
| 460 | + |
| 461 | + for filename, url in self._get_urls(pkg): |
| 462 | + self._schedule_check(filename, url, executor, futures, pkg=pkg) |
0 commit comments