Skip to content

Commit 8d37d77

Browse files
authored
Add hackage collector for on demand collection (#716)
* Add hackage collector for on demand collection Signed-off-by: Tushar Goel <[email protected]> * Fix codestyle error Signed-off-by: Tushar Goel <[email protected]> --------- Signed-off-by: Tushar Goel <[email protected]>
1 parent bd17e10 commit 8d37d77

File tree

2 files changed

+145
-0
lines changed

2 files changed

+145
-0
lines changed

minecode/collectors/hackage.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# purldb is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/purldb for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import logging
11+
from packageurl import PackageURL
12+
import requests
13+
from minecode import priority_router
14+
15+
from packagedb.models import PackageContentType
16+
from packagedcode import models as scan_models
17+
18+
logger = logging.getLogger(__name__)
19+
handler = logging.StreamHandler()
20+
logger.addHandler(handler)
21+
logger.setLevel(logging.INFO)
22+
23+
24+
def get_hackage_package_json(name):
25+
"""
26+
Return the contents of the JSON file of the package from Hackage.
27+
Example: https://hackage.haskell.org/package/dplyr.json
28+
"""
29+
url = f"https://hackage.haskell.org/package/{name}.json"
30+
31+
try:
32+
response = requests.get(url)
33+
response.raise_for_status()
34+
return response.json()
35+
except requests.RequestException as err:
36+
logger.error(f"Error fetching package data from Hackage: {err}")
37+
return None
38+
39+
40+
def map_hackage_package(package_url, pipelines, priority=0):
41+
"""
42+
Add a hackage `package_url` to the PackageDB.
43+
"""
44+
from minecode.model_utils import add_package_to_scan_queue, merge_or_create_package
45+
46+
name = package_url.name
47+
version = package_url.version
48+
49+
versions = get_hackage_package_json(name=name)
50+
if version not in versions:
51+
error = f"Version {version} not found for {name} on hackage"
52+
logger.error(error)
53+
return error
54+
55+
download_url = f"https://hackage.haskell.org/package/{name}-{version}/{name}-{version}.tar.gz"
56+
homepage_url = f"https://hackage.haskell.org/package/{name}-{version}"
57+
58+
package = scan_models.Package(
59+
type="hackage",
60+
name=name,
61+
version=version,
62+
download_url=download_url,
63+
homepage_url=homepage_url,
64+
primary_language="haskell",
65+
)
66+
67+
package.extra_data["package_content"] = PackageContentType.SOURCE_ARCHIVE
68+
db_package, _, _, error = merge_or_create_package(package, visit_level=0)
69+
70+
if db_package:
71+
add_package_to_scan_queue(package=db_package, pipelines=pipelines, priority=priority)
72+
73+
return error
74+
75+
76+
@priority_router.route("pkg:hackage/.*")
77+
def process_request(purl_str, **kwargs):
78+
"""
79+
Process Hackage Package URL (PURL).
80+
"""
81+
from minecode.model_utils import DEFAULT_PIPELINES
82+
83+
addon_pipelines = kwargs.get("addon_pipelines", [])
84+
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)
85+
priority = kwargs.get("priority", 0)
86+
87+
package_url = PackageURL.from_string(purl_str)
88+
error_msg = map_hackage_package(package_url, pipelines, priority)
89+
90+
if error_msg:
91+
return error_msg
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# purldb is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/purldb for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import pytest
11+
from unittest.mock import patch
12+
13+
from packageurl import PackageURL
14+
import requests
15+
16+
import minecode.collectors.hackage as hackage
17+
18+
19+
@pytest.fixture
20+
def package_url():
21+
return PackageURL.from_string("pkg:hackage/[email protected]")
22+
23+
24+
def test_map_hackage_package_success(package_url):
25+
with (
26+
patch("minecode.collectors.hackage.get_hackage_package_json") as mock_get_json,
27+
patch("minecode.model_utils.merge_or_create_package") as mock_merge,
28+
patch("minecode.model_utils.add_package_to_scan_queue") as mock_add,
29+
):
30+
mock_get_json.return_value = {"1.1.1": "normal", "1.2.1": "normal"}
31+
mock_merge.return_value = ("db_package", None, None, None)
32+
33+
error = hackage.map_hackage_package(package_url, pipelines=["p1"], priority=1)
34+
35+
assert error is None
36+
mock_get_json.assert_called_once_with(name="ac-halfinteger")
37+
mock_merge.assert_called_once()
38+
mock_add.assert_called_once_with(package="db_package", pipelines=["p1"], priority=1)
39+
40+
41+
def test_map_hackage_package_version_not_found(package_url):
42+
with patch("minecode.collectors.hackage.get_hackage_package_json") as mock_get_json:
43+
mock_get_json.return_value = {"2.0.0": "normal"}
44+
45+
error = hackage.map_hackage_package(package_url, pipelines=[])
46+
assert "not found" in error
47+
48+
49+
def test_map_hackage_package_network_error(package_url):
50+
with patch("minecode.collectors.hackage.requests.get") as mock_get:
51+
mock_get.side_effect = requests.RequestException("Network down")
52+
53+
data = hackage.get_hackage_package_json(package_url.name)
54+
assert data is None

0 commit comments

Comments
 (0)