|
| 1 | +# |
| 2 | +# Copyright (c) nexB Inc. and others. All rights reserved. |
| 3 | +# purldb is a trademark of nexB Inc. |
| 4 | +# SPDX-License-Identifier: Apache-2.0 |
| 5 | +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. |
| 6 | +# See https://github.com/aboutcode-org/purldb for support or download. |
| 7 | +# See https://aboutcode.org for more information about nexB OSS projects. |
| 8 | +# |
| 9 | + |
| 10 | + |
| 11 | +import json |
| 12 | +import requests |
| 13 | + |
| 14 | +from packageurl import PackageURL |
| 15 | + |
| 16 | +from minecode_pipeline.utils import get_temp_file |
| 17 | + |
| 18 | +""" |
| 19 | +Visitors for Pypi and Pypi-like Python package repositories. |
| 20 | +
|
| 21 | +We have this hierarchy in Pypi simple/ index: |
| 22 | + pypi projects (JSON/HTML) -> project versions (JSON/HTML) -> download urls |
| 23 | +
|
| 24 | +https://pypi.org/simple/ |
| 25 | +Pypi serves a main index via JSON/HTML API that contains a list of package names |
| 26 | +and some info on when a package was updated by releasing a new version. |
| 27 | +See https://docs.pypi.org/api/index-api/ for more details. |
| 28 | +This index also has a list of versions and download URLs of all |
| 29 | +uploaded/available package archives and some basic metadata. |
| 30 | +
|
| 31 | +https://pypi.org/pypi/{name}/json |
| 32 | +For each package, a JSON contains details including the list of all releases |
| 33 | +and archives, their URLs, and some metadata for each release. |
| 34 | +For each release, a JSON contains details for the released version and all the |
| 35 | +downloads available for this release. |
| 36 | +""" |
| 37 | + |
| 38 | + |
| 39 | +pypi_json_headers = {"Accept": "application/vnd.pypi.simple.v1+json"} |
| 40 | + |
| 41 | + |
| 42 | +PYPI_REPO = "https://pypi.org/simple/" |
| 43 | +PYPI_TYPE = "pypi" |
| 44 | + |
| 45 | + |
| 46 | +def get_pypi_packages(pypi_repo, logger=None): |
| 47 | + response = requests.get(pypi_repo, headers=pypi_json_headers) |
| 48 | + if not response.ok: |
| 49 | + return |
| 50 | + |
| 51 | + packages = response.json() |
| 52 | + temp_file = get_temp_file("PypiPackagesJSON") |
| 53 | + with open(temp_file, "w", encoding="utf-8") as f: |
| 54 | + json.dump(packages, f, indent=4) |
| 55 | + |
| 56 | + return temp_file |
| 57 | + |
| 58 | + |
| 59 | +def get_pypi_packageurls(name): |
| 60 | + packageurls = [] |
| 61 | + |
| 62 | + project_index_api_url = PYPI_REPO + name |
| 63 | + response = requests.get(project_index_api_url, headers=pypi_json_headers) |
| 64 | + if not response.ok: |
| 65 | + return packageurls |
| 66 | + |
| 67 | + project_data = response.json() |
| 68 | + for version in project_data.get("versions"): |
| 69 | + purl = PackageURL( |
| 70 | + type=PYPI_TYPE, |
| 71 | + name=name, |
| 72 | + version=version, |
| 73 | + ) |
| 74 | + packageurls.append(purl.to_string()) |
| 75 | + |
| 76 | + return packageurls |
| 77 | + |
| 78 | + |
| 79 | +def load_pypi_packages(packages): |
| 80 | + with open(packages) as f: |
| 81 | + packages_data = json.load(f) |
| 82 | + |
| 83 | + last_serial = packages_data.get("meta").get("_last-serial") |
| 84 | + packages = packages_data.get("projects") |
| 85 | + |
| 86 | + return last_serial, packages |
0 commit comments