|
| 1 | +# |
| 2 | +# Copyright (c) nexB Inc. and others. All rights reserved. |
| 3 | +# purldb is a trademark of nexB Inc. |
| 4 | +# SPDX-License-Identifier: Apache-2.0 |
| 5 | +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. |
| 6 | +# See https://github.com/aboutcode-org/purldb for support or download. |
| 7 | +# See https://aboutcode.org for more information about nexB OSS projects. |
| 8 | +# |
| 9 | + |
| 10 | +import requests |
| 11 | +from packagedcode import models as scan_models |
| 12 | + |
| 13 | + |
| 14 | +def build_packages(metadata_dict, purl): |
| 15 | + """ |
| 16 | + Yield ScannedPackage built from crates.io. |
| 17 | +
|
| 18 | + The metadata_dict is a dictionary. It consists of four primary |
| 19 | + components: crate, version, keywords, and categories. Among these, the |
| 20 | + version is the key focus, while the other three provide a summary of |
| 21 | + the package. |
| 22 | +
|
| 23 | + purl: String value of the package url of the ResourceURI object |
| 24 | + """ |
| 25 | + purl_version = purl.version |
| 26 | + versions = metadata_dict["versions"] |
| 27 | + for version_info in versions: |
| 28 | + version = version_info["num"] |
| 29 | + if purl_version and not purl_version == version: |
| 30 | + continue |
| 31 | + description = version_info["description"] |
| 32 | + name = version_info["crate"] |
| 33 | + homepage_url = version_info["homepage"] |
| 34 | + repository_homepage_url = version_info["repository"] |
| 35 | + |
| 36 | + extracted_license_statement = [] |
| 37 | + lic = version_info["license"] |
| 38 | + if lic and lic != "UNKNOWN": |
| 39 | + extracted_license_statement.append(lic) |
| 40 | + |
| 41 | + kw = metadata_dict["crate"]["keywords"] |
| 42 | + |
| 43 | + # mapping of information that are common to all the downloads of a |
| 44 | + # version |
| 45 | + common_data = dict( |
| 46 | + name=name, |
| 47 | + version=version, |
| 48 | + description=description, |
| 49 | + homepage_url=homepage_url, |
| 50 | + repository_homepage_url=repository_homepage_url, |
| 51 | + extracted_license_statement=extracted_license_statement, |
| 52 | + keywords=kw, |
| 53 | + ) |
| 54 | + |
| 55 | + if version_info["published_by"]: |
| 56 | + if version_info["published_by"]["name"]: |
| 57 | + author = version_info["published_by"]["name"] |
| 58 | + else: |
| 59 | + author = version_info["published_by"]["login"] |
| 60 | + |
| 61 | + if author: |
| 62 | + parties = common_data.get("parties") |
| 63 | + if not parties: |
| 64 | + common_data["parties"] = [] |
| 65 | + common_data["parties"].append( |
| 66 | + scan_models.Party(name=author, role="author") |
| 67 | + ) |
| 68 | + |
| 69 | + download_path = version_info["dl_path"] |
| 70 | + if download_path: |
| 71 | + # As the consistently ends with "/download" (e.g., |
| 72 | + # "/api/v1/crates/purl/0.1.5/download"), we need to obtain the |
| 73 | + # redirected URL to ensure the filename is not simply |
| 74 | + # "download." |
| 75 | + download_url = "https://crates.io/" + download_path |
| 76 | + response = requests.head(download_url, allow_redirects=True) |
| 77 | + download_url = response.url |
| 78 | + |
| 79 | + download_data = dict( |
| 80 | + datasource_id="cargo_pkginfo", |
| 81 | + type="cargo", |
| 82 | + download_url=download_url, |
| 83 | + size=version_info["crate_size"], |
| 84 | + sha256=version_info["checksum"], |
| 85 | + ) |
| 86 | + download_data.update(common_data) |
| 87 | + package = scan_models.PackageData.from_data(download_data) |
| 88 | + |
| 89 | + package.datasource_id = "cargo_api_metadata" |
| 90 | + package.set_purl(purl) |
| 91 | + yield package |
0 commit comments