Skip to content

Commit 2a05e4f

Browse files
committed
Create minecode_pipeline module for mine cargo
Signed-off-by: ziad hany <[email protected]>
1 parent 3428caf commit 2a05e4f

File tree

8 files changed

+217
-0
lines changed

8 files changed

+217
-0
lines changed

minecode_pipeline/README.rst

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
minecode-pipeline
2+
===================
3+
4+
minecode-pipeline is an add-on library working with scancode.io to define pipelines to mine
5+
packageURLs and package metadata from ecosystem repositories and APIs.
6+
7+
Installation
8+
------------
9+
10+
Requirements
11+
############
12+
13+
* install minecode-pipeline dependencies
14+
* `pip install minecode-pipeline`
15+
16+
17+
Funding
18+
-------
19+
20+
This project was funded through the NGI Assure Fund https://nlnet.nl/assure, a
21+
fund established by NLnet https://nlnet.nl/ with financial support from the
22+
European Commission's Next Generation Internet programme, under the aegis of DG
23+
Communications Networks, Content and Technology under grant agreement No 957073.
24+
25+
This project is also funded through grants from the Google Summer of Code
26+
program, continuing support and sponsoring from nexB Inc. and generous
27+
donations from multiple sponsors.
28+
29+
30+
License
31+
-------
32+
33+
Copyright (c) nexB Inc. and others. All rights reserved.
34+
35+
purldb is a trademark of nexB Inc.
36+
37+
SPDX-License-Identifier: Apache-2.0
38+
39+
minecode-pipeline is licensed under the Apache License version 2.0.
40+
41+
See https://www.apache.org/licenses/LICENSE-2.0 for the license text.
42+
See https://github.com/aboutcode-org/purldb for support or download.
43+
See https://aboutcode.org for more information about nexB OSS projects.

minecode_pipeline/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# purldb is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/purldb for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# purldb is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/purldb for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
#
3+
# http://nexb.com and https://github.com/aboutcode-org/scancode.io
4+
# The ScanCode.io software is licensed under the Apache License version 2.0.
5+
# Data generated with ScanCode.io is provided as-is without warranties.
6+
# ScanCode is a trademark of nexB Inc.
7+
#
8+
# You may not use this software except in compliance with the License.
9+
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
10+
# Unless required by applicable law or agreed to in writing, software distributed
11+
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12+
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
13+
# specific language governing permissions and limitations under the License.
14+
#
15+
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
16+
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
17+
# ScanCode.io should be considered or used as legal advice. Consult an Attorney
18+
# for any legal advice.
19+
#
20+
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
21+
# Visit https://github.com/aboutcode-org/scancode.io for support and download.
22+
import json
23+
from pathlib import Path
24+
25+
from minecode_pipeline.pipes import cargo
26+
from scanpipe.pipelines.publish_to_federatedcode import PublishToFederatedCode
27+
from fetchcode.vcs import fetch_via_vcs
28+
29+
30+
class MineCargo(PublishToFederatedCode):
31+
"""Pipeline to mine Cargo (crates.io) packages and publish them to FederatedCode."""
32+
33+
repo_url = "git+https://github.com/rust-lang/crates.io-index"
34+
35+
@classmethod
36+
def steps(cls):
37+
return (
38+
cls.check_federatedcode_eligibility,
39+
cls.clone_cargo_index,
40+
cls.clone_repository,
41+
cls.collect_packages_from_cargo,
42+
cls.delete_local_clone,
43+
)
44+
45+
def clone_cargo_index(self, repo_url):
46+
"""
47+
Clone the repo at repo_url and return the VCSResponse object
48+
"""
49+
self.vcs_response = fetch_via_vcs(repo_url)
50+
51+
def collect_packages_from_cargo(self):
52+
base_path = Path(self.vcs_response.dest_dir)
53+
54+
json_files = []
55+
for file_path in base_path.glob("**/*"):
56+
if not file_path.is_file():
57+
continue
58+
if file_path.name in {"config.json", "README.md", "update-dl-url.yml"}:
59+
continue
60+
json_files.append(file_path)
61+
62+
for idx, file_path in enumerate(json_files, start=1):
63+
try:
64+
with open(file_path, encoding="utf-8") as f:
65+
packages = json.load(f)
66+
except (json.JSONDecodeError, UnicodeDecodeError):
67+
continue
68+
69+
if packages:
70+
push_commit = idx == len(json_files) # only True on last
71+
cargo.collect_packages_from_cargo(packages, self.vcs_response, push_commit)
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# purldb is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/purldb for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
import textwrap
10+
from pathlib import Path
11+
import saneyaml
12+
from aboutcode import hashid
13+
14+
ALLOWED_HOST = "ALLOWED_HOST"
15+
VERSION = "ALLOWED_HOST"
16+
author_name = "FEDERATEDCODE_GIT_SERVICE_NAME"
17+
author_email = "FEDERATEDCODE_GIT_SERVICE_EMAIL"
18+
remote_name = "origin"
19+
20+
21+
def write_purls_to_repo(repo, package, packages_yaml, push_commit=False):
22+
"""Write or update package purls in the repo and optionally commit/push changes."""
23+
24+
ppath = hashid.get_package_purls_yml_file_path(package)
25+
add_purl_result(packages_yaml, repo, ppath)
26+
27+
if push_commit:
28+
change_type = "Add" if ppath in repo.untracked_files else "Update"
29+
commit_message = f"""\
30+
{change_type} list of available {package} versions
31+
Tool: pkg:github/aboutcode-org/purldb@v{VERSION}
32+
Reference: https://{ALLOWED_HOST}/
33+
Signed-off-by: {author_name} <{author_email}>
34+
"""
35+
36+
default_branch = repo.active_branch.name
37+
repo.index.commit(textwrap.dedent(commit_message))
38+
repo.git.push(remote_name, default_branch, "--no-verify")
39+
40+
41+
def add_purl_result(purls, repo, purls_file):
42+
"""Add package urls result to the local Git repository."""
43+
relative_purl_file_path = Path(*purls_file.parts[1:])
44+
45+
write_to = Path(repo.working_dir) / relative_purl_file_path
46+
write_to.parent.mkdir(parents=True, exist_ok=True)
47+
48+
with open(purls_file, encoding="utf-8", mode="w") as f:
49+
f.write(saneyaml.dump(purls))
50+
51+
repo.index.add([relative_purl_file_path])
52+
return relative_purl_file_path

minecode_pipeline/pipes/cargo.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
from packageurl import PackageURL
2+
from aboutcode.hashid import get_core_purl
3+
from minecode_pipeline.pipes import write_purls_to_repo
4+
5+
6+
def collect_packages_from_cargo(packages, repo, push_commit=False):
7+
"""Collect Cargo package versions into purls and write them to the repo."""
8+
9+
if not packages and len(packages) > 0:
10+
raise ValueError("No packages found")
11+
12+
updated_purls = []
13+
first_pkg = packages[0]
14+
version = first_pkg.get("vers")
15+
name = first_pkg.get("name")
16+
purl = PackageURL(type="cargo", name=name, version=version)
17+
base_purl = get_core_purl(purl)
18+
19+
for package in packages:
20+
version = package.get("vers")
21+
name = package.get("name")
22+
23+
purl = PackageURL(type="cargo", name=name, version=version).to_string()
24+
updated_purls.append(purl)
25+
26+
write_purls_to_repo(repo, base_purl, packages, push_commit)
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# purldb is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/purldb for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#

setup.cfg

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,3 +100,4 @@ console_scripts =
100100
scancodeio_pipelines =
101101
matching = matchcode_pipeline.pipelines.matching:Matching
102102
d2d = scanpipe.pipelines.deploy_to_develop:DeployToDevelop
103+
mine_cargo = minecode_pipeline.pipelines.mine_cargo:MineCargo

0 commit comments

Comments
 (0)