Skip to content

Commit edff9e1

Browse files
authored
add on demand package data collection for golang, gitlab and bitbucket #596 (#608)
1 parent 4c463f2 commit edff9e1

File tree

19 files changed

+1123
-9
lines changed

19 files changed

+1123
-9
lines changed

minecode/collectors/bitbucket.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# purldb is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/nexB/purldb for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import logging
11+
12+
import requests
13+
14+
15+
"""
16+
Collect bitbucket packages from bitbucket registries.
17+
"""
18+
19+
logger = logging.getLogger(__name__)
20+
handler = logging.StreamHandler()
21+
logger.addHandler(handler)
22+
logger.setLevel(logging.INFO)
23+
24+
25+
def bitbucket_get_all_package_version_author(subset_path):
26+
"""
27+
Return a list of all version numbers along with author for the package.
28+
"""
29+
repo_tags = f"https://api.bitbucket.org/2.0/repositories/{subset_path}/refs/tags"
30+
version_author_list = []
31+
try:
32+
while repo_tags:
33+
response = requests.get(repo_tags)
34+
response.raise_for_status()
35+
data = response.json()
36+
if data["size"] > 0:
37+
# Get all available versions
38+
for item in data["values"]:
39+
version = item.get("name")
40+
target = item.get("target") or {}
41+
author = target.get("author") or {}
42+
if author.get("type") == "author":
43+
user = author.get("user") or {}
44+
author_display_name = user.get("display_name")
45+
version_author_list.append((version, author_display_name))
46+
# Handle pagination
47+
repo_tags = data.get("next", None)
48+
return version_author_list
49+
except requests.exceptions.HTTPError as err:
50+
logger.error(f"HTTP error occurred: {err}")

minecode/collectors/generic.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def packagedata_from_dict(package_data):
100100
return PackageData.from_data(cleaned_package_data)
101101

102102

103-
def map_fetchcode_supported_package(package_url, pipelines, priority=0):
103+
def map_fetchcode_supported_package(package_url, pipelines, priority=0, from_go_lang=False):
104104
"""
105105
Add a `package_url` supported by fetchcode to the PackageDB.
106106
@@ -109,13 +109,24 @@ def map_fetchcode_supported_package(package_url, pipelines, priority=0):
109109
from minecode.model_utils import add_package_to_scan_queue
110110
from minecode.model_utils import merge_or_create_package
111111

112-
packages = [p for p in info(str(package_url)) or []]
112+
try:
113+
packages = []
114+
packages = [p for p in info(str(package_url)) or []]
115+
except Exception as e:
116+
print(str(e))
113117

114118
if not packages:
115-
error = f"Could not find package using fetchcode: {package_url}"
119+
if from_go_lang:
120+
purl = "pkg:golang/" + str(package_url).partition("pkg:")[2]
121+
else:
122+
purl = str(package_url)
123+
error = f"Could not find package using fetchcode: {purl}"
116124
logger.error(error)
117125
return error
118126

127+
if from_go_lang:
128+
packages[0].type = "golang"
129+
packages[0].namespace = "github.com/" + packages[0].namespace
119130
package_data = packages[0].to_dict()
120131

121132
# Remove obsolete Package fields see https://github.com/aboutcode-org/fetchcode/issues/108

minecode/collectors/github.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,42 @@
1313
from minecode.collectors.generic import map_fetchcode_supported_package
1414

1515

16+
def github_get_all_versions(subset_path):
17+
"""
18+
Fetch all versions (tags) from a GitHub repository using the API
19+
Returns a list of all version tags in the repository
20+
"""
21+
import requests
22+
23+
url = f"https://api.github.com/repos/{subset_path}/tags"
24+
version_list = []
25+
page = 1
26+
27+
while True:
28+
response = requests.get(
29+
url,
30+
params={"page": page, "per_page": 100}, # Max 100 per page
31+
headers={"Accept": "application/vnd.github.v3+json"},
32+
)
33+
response.raise_for_status()
34+
35+
data = response.json()
36+
if not data:
37+
break
38+
39+
for tag in data:
40+
version = tag.get("name") or {}
41+
if version:
42+
version_list.append(version)
43+
page += 1
44+
45+
# Check if we've reached the last page
46+
if "next" not in response.links:
47+
break
48+
49+
return version_list
50+
51+
1652
# Indexing GitHub PURLs requires a GitHub API token.
1753
# Please add your GitHub API key to the `.env` file, for example: `GH_TOKEN=your-github-api`.
1854
@priority_router.route("pkg:github/.*")

minecode/collectors/gitlab.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# purldb is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/nexB/purldb for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import logging
11+
12+
import requests
13+
14+
15+
"""
16+
Collect gitlab packages from gitlab registries.
17+
"""
18+
19+
logger = logging.getLogger(__name__)
20+
handler = logging.StreamHandler()
21+
logger.addHandler(handler)
22+
logger.setLevel(logging.INFO)
23+
24+
25+
def gitlab_get_all_package_version_author(subset_path):
26+
"""
27+
Return a list of all version numbers along with author and author email
28+
for the package.
29+
"""
30+
repo_tags = f"https://gitlab.com/api/v4/projects/{subset_path}/repository/tags"
31+
try:
32+
response = requests.get(repo_tags)
33+
response.raise_for_status()
34+
data = response.json()
35+
version_author_list = []
36+
# Get all available versions
37+
for item in data:
38+
version = item.get("name")
39+
commit = item.get("commit") or {}
40+
author = commit.get("author_name") or ""
41+
author_email = commit.get("author_email") or ""
42+
version_author_list.append((version, author, author_email))
43+
return version_author_list
44+
except requests.exceptions.HTTPError as err:
45+
logger.error(f"HTTP error occurred: {err}")

0 commit comments

Comments
 (0)