Skip to content

Commit e5ed41a

Browse files
committed
Merge branch 'main' into 660-purl-next-maven
2 parents 0d441f9 + fba0889 commit e5ed41a

38 files changed

+12722
-135
lines changed

minecode/collectors/bitbucket.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# purldb is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/nexB/purldb for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import logging
11+
12+
import requests
13+
14+
15+
"""
16+
Collect bitbucket packages from bitbucket registries.
17+
"""
18+
19+
logger = logging.getLogger(__name__)
20+
handler = logging.StreamHandler()
21+
logger.addHandler(handler)
22+
logger.setLevel(logging.INFO)
23+
24+
25+
def bitbucket_get_all_package_version_author(subset_path):
26+
"""
27+
Return a list of all version numbers along with author for the package.
28+
"""
29+
repo_tags = f"https://api.bitbucket.org/2.0/repositories/{subset_path}/refs/tags"
30+
version_author_list = []
31+
try:
32+
while repo_tags:
33+
response = requests.get(repo_tags)
34+
response.raise_for_status()
35+
data = response.json()
36+
if data["size"] > 0:
37+
# Get all available versions
38+
for item in data["values"]:
39+
version = item.get("name")
40+
target = item.get("target") or {}
41+
author = target.get("author") or {}
42+
if author.get("type") == "author":
43+
user = author.get("user") or {}
44+
author_display_name = user.get("display_name")
45+
version_author_list.append((version, author_display_name))
46+
# Handle pagination
47+
repo_tags = data.get("next", None)
48+
return version_author_list
49+
except requests.exceptions.HTTPError as err:
50+
logger.error(f"HTTP error occurred: {err}")

minecode/collectors/composer.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# purldb is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/nexB/purldb for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import logging
11+
import requests
12+
from packageurl import PackageURL
13+
from minecode.miners.composer import build_packages
14+
15+
from minecode import priority_router
16+
from packagedb.models import PackageContentType
17+
18+
logger = logging.getLogger(__name__)
19+
handler = logging.StreamHandler()
20+
logger.addHandler(handler)
21+
logger.setLevel(logging.INFO)
22+
23+
24+
def get_composer_package_json(name):
25+
"""
26+
Return the contents of the JSON file of the package from Packagist.
27+
Example: https://repo.packagist.org/p2/laravel/laravel.json
28+
"""
29+
url = f"https://repo.packagist.org/p2/{name}.json"
30+
31+
try:
32+
response = requests.get(url)
33+
response.raise_for_status()
34+
return response.json()
35+
except requests.exceptions.HTTPError as err:
36+
logger.error(f"HTTP error occurred: {err}")
37+
38+
39+
def map_composer_package(package_url, pipelines, priority=0):
40+
"""
41+
Add a composer `package_url` to the PackageDB.
42+
"""
43+
from minecode.model_utils import add_package_to_scan_queue, merge_or_create_package
44+
45+
namespace = package_url.namespace
46+
name = package_url.name
47+
48+
package_name = f"{namespace}/{name}" if namespace else name
49+
50+
package_json = get_composer_package_json(name=package_name)
51+
52+
if not package_json:
53+
error = f"Package does not exist on packagist.org: {package_url}"
54+
logger.error(error)
55+
return error
56+
57+
packages = build_packages(package_json, package_url)
58+
59+
error = None
60+
for package in packages:
61+
package.extra_data["package_content"] = PackageContentType.SOURCE_ARCHIVE
62+
db_package, _, _, error = merge_or_create_package(package, visit_level=0)
63+
if error:
64+
break
65+
66+
if db_package:
67+
add_package_to_scan_queue(package=db_package, pipelines=pipelines, priority=priority)
68+
69+
return error
70+
71+
72+
@priority_router.route("pkg:composer/.*")
73+
def process_request(purl_str, **kwargs):
74+
"""
75+
Process `priority_resource_uri` containing a composer Package URL (PURL).
76+
"""
77+
from minecode.model_utils import DEFAULT_PIPELINES
78+
79+
addon_pipelines = kwargs.get("addon_pipelines", [])
80+
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)
81+
priority = kwargs.get("priority", 0)
82+
83+
package_url = PackageURL.from_string(purl_str)
84+
85+
error_msg = map_composer_package(package_url, pipelines, priority)
86+
87+
if error_msg:
88+
return error_msg

minecode/collectors/cpan.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# purldb is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0
6+
# See https://github.com/aboutcode-org/purldb for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import logging
11+
import requests
12+
from packageurl import PackageURL
13+
14+
from minecode import priority_router
15+
from packagedb.models import PackageContentType
16+
17+
logger = logging.getLogger(__name__)
18+
handler = logging.StreamHandler()
19+
logger.addHandler(handler)
20+
logger.setLevel(logging.INFO)
21+
22+
23+
def get_cpan_release_json(distribution, version):
24+
"""
25+
Return the MetaCPAN release JSON for a given distribution@version.
26+
27+
Example:
28+
https://fastapi.metacpan.org/v1/release/_search?q=distribution:Mojolicious%20AND%20version:9.22
29+
30+
"""
31+
url = (
32+
f"https://fastapi.metacpan.org/v1/release/_search?"
33+
f"q=distribution:{distribution}%20AND%20version:{version}"
34+
)
35+
36+
try:
37+
response = requests.get(url)
38+
response.raise_for_status()
39+
results = response.json()
40+
hits = results.get("hits", {}).get("hits", [])
41+
if not hits:
42+
return None
43+
return hits[0].get("_source")
44+
except requests.exceptions.HTTPError as err:
45+
logger.error(f"HTTP error occurred: {err}")
46+
return None
47+
48+
49+
def map_cpan_package(package_url, pipelines, priority=0):
50+
"""
51+
Add a CPAN distribution `package_url` to the PackageDB.
52+
"""
53+
from minecode.model_utils import add_package_to_scan_queue, merge_or_create_package
54+
from minecode.miners.cpan import build_packages
55+
56+
name = package_url.name
57+
version = package_url.version
58+
release_json = get_cpan_release_json(name, version)
59+
60+
if not release_json:
61+
error = f"Distribution does not exist on CPAN: {package_url}"
62+
logger.error(error)
63+
return error
64+
65+
packages = build_packages(release_json, package_url)
66+
67+
error = None
68+
for package in packages:
69+
package.extra_data["package_content"] = PackageContentType.SOURCE_ARCHIVE
70+
db_package, _, _, error = merge_or_create_package(package, visit_level=0)
71+
if error:
72+
break
73+
74+
if db_package:
75+
add_package_to_scan_queue(package=db_package, pipelines=pipelines, priority=priority)
76+
77+
return error
78+
79+
80+
@priority_router.route("pkg:cpan/.*")
81+
def process_request(purl_str, **kwargs):
82+
"""
83+
Process CPAN Package URL (PURL).
84+
"""
85+
from minecode.model_utils import DEFAULT_PIPELINES
86+
87+
addon_pipelines = kwargs.get("addon_pipelines", [])
88+
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)
89+
priority = kwargs.get("priority", 0)
90+
91+
package_url = PackageURL.from_string(purl_str)
92+
error_msg = map_cpan_package(package_url, pipelines, priority)
93+
94+
if error_msg:
95+
return error_msg

minecode/collectors/cran.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# purldb is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/nexB/purldb for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import logging
11+
import requests
12+
from packageurl import PackageURL
13+
14+
from minecode import priority_router
15+
from packagedb.models import PackageContentType
16+
17+
logger = logging.getLogger(__name__)
18+
handler = logging.StreamHandler()
19+
logger.addHandler(handler)
20+
logger.setLevel(logging.INFO)
21+
22+
23+
def get_cran_package_json(name):
24+
"""
25+
Return the contents of the JSON file of the package from CRAN DB API.
26+
Example: https://crandb.r-pkg.org/dplyr/all
27+
"""
28+
url = f"https://crandb.r-pkg.org/{name}/all"
29+
30+
try:
31+
response = requests.get(url)
32+
response.raise_for_status()
33+
return response.json()
34+
except requests.exceptions.HTTPError as err:
35+
logger.error(f"HTTP error occurred: {err}")
36+
37+
38+
def map_cran_package(package_url, pipelines, priority=0):
39+
"""
40+
Add a CRAN `package_url` to the PackageDB.
41+
"""
42+
from minecode.model_utils import add_package_to_scan_queue, merge_or_create_package
43+
from minecode.miners.cran import build_packages
44+
45+
name = package_url.name
46+
package_json = get_cran_package_json(name)
47+
48+
if not package_json:
49+
error = f"Package does not exist on CRAN: {package_url}"
50+
logger.error(error)
51+
return error
52+
53+
packages = build_packages(package_json, package_url)
54+
55+
error = None
56+
for package in packages:
57+
package.extra_data["package_content"] = PackageContentType.SOURCE_ARCHIVE
58+
db_package, _, _, error = merge_or_create_package(package, visit_level=0)
59+
if error:
60+
break
61+
62+
if db_package:
63+
add_package_to_scan_queue(package=db_package, pipelines=pipelines, priority=priority)
64+
65+
return error
66+
67+
68+
@priority_router.route("pkg:cran/.*")
69+
def process_request(purl_str, **kwargs):
70+
"""
71+
Process CRAN Package URL (PURL).
72+
"""
73+
from minecode.model_utils import DEFAULT_PIPELINES
74+
75+
addon_pipelines = kwargs.get("addon_pipelines", [])
76+
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)
77+
priority = kwargs.get("priority", 0)
78+
79+
package_url = PackageURL.from_string(purl_str)
80+
error_msg = map_cran_package(package_url, pipelines, priority)
81+
82+
if error_msg:
83+
return error_msg

minecode/collectors/generic.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def packagedata_from_dict(package_data):
100100
return PackageData.from_data(cleaned_package_data)
101101

102102

103-
def map_fetchcode_supported_package(package_url, pipelines, priority=0):
103+
def map_fetchcode_supported_package(package_url, pipelines, priority=0, from_go_lang=False):
104104
"""
105105
Add a `package_url` supported by fetchcode to the PackageDB.
106106
@@ -109,13 +109,24 @@ def map_fetchcode_supported_package(package_url, pipelines, priority=0):
109109
from minecode.model_utils import add_package_to_scan_queue
110110
from minecode.model_utils import merge_or_create_package
111111

112-
packages = [p for p in info(str(package_url)) or []]
112+
try:
113+
packages = []
114+
packages = [p for p in info(str(package_url)) or []]
115+
except Exception as e:
116+
print(str(e))
113117

114118
if not packages:
115-
error = f"Could not find package using fetchcode: {package_url}"
119+
if from_go_lang:
120+
purl = "pkg:golang/" + str(package_url).partition("pkg:")[2]
121+
else:
122+
purl = str(package_url)
123+
error = f"Could not find package using fetchcode: {purl}"
116124
logger.error(error)
117125
return error
118126

127+
if from_go_lang:
128+
packages[0].type = "golang"
129+
packages[0].namespace = "github.com/" + packages[0].namespace
119130
package_data = packages[0].to_dict()
120131

121132
# Remove obsolete Package fields see https://github.com/aboutcode-org/fetchcode/issues/108

minecode/collectors/github.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,42 @@
1313
from minecode.collectors.generic import map_fetchcode_supported_package
1414

1515

16+
def github_get_all_versions(subset_path):
17+
"""
18+
Fetch all versions (tags) from a GitHub repository using the API
19+
Returns a list of all version tags in the repository
20+
"""
21+
import requests
22+
23+
url = f"https://api.github.com/repos/{subset_path}/tags"
24+
version_list = []
25+
page = 1
26+
27+
while True:
28+
response = requests.get(
29+
url,
30+
params={"page": page, "per_page": 100}, # Max 100 per page
31+
headers={"Accept": "application/vnd.github.v3+json"},
32+
)
33+
response.raise_for_status()
34+
35+
data = response.json()
36+
if not data:
37+
break
38+
39+
for tag in data:
40+
version = tag.get("name") or {}
41+
if version:
42+
version_list.append(version)
43+
page += 1
44+
45+
# Check if we've reached the last page
46+
if "next" not in response.links:
47+
break
48+
49+
return version_list
50+
51+
1652
# Indexing GitHub PURLs requires a GitHub API token.
1753
# Please add your GitHub API key to the `.env` file, for example: `GH_TOKEN=your-github-api`.
1854
@priority_router.route("pkg:github/.*")

0 commit comments

Comments
 (0)