Skip to content

Commit 0b5ba7c

Browse files
authored
Move the user id from project name to a label #387 (#426)
Signed-off-by: tdruez <[email protected]>
1 parent 4c82a1d commit 0b5ba7c

File tree

4 files changed

+52
-72
lines changed

4 files changed

+52
-72
lines changed

component_catalog/tests/test_scancodeio.py

Lines changed: 17 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,9 @@
1717
from component_catalog.models import Package
1818
from component_catalog.tests import make_package
1919
from dejacode_toolkit.scancodeio import ScanCodeIO
20-
from dejacode_toolkit.scancodeio import check_for_existing_scan_workaround
2120
from dejacode_toolkit.scancodeio import get_hash_uid
2221
from dejacode_toolkit.scancodeio import get_notice_text_from_key_files
22+
from dejacode_toolkit.scancodeio import update_package_from_existing_scan_data
2323
from dje.models import Dataspace
2424
from dje.models import History
2525
from dje.tasks import scancodeio_submit_scan
@@ -67,41 +67,38 @@ def test_scancodeio_submit_scan_task(self, mock_submit_scan, mock_request_head):
6767

6868
expected = [
6969
mock.call("http://okurl.com", user_uuid, dataspace_uuid),
70-
mock.call().__bool__(),
7170
mock.call("https://okurl2.com", user_uuid, dataspace_uuid),
72-
mock.call().__bool__(),
7371
]
7472
self.assertEqual(expected, mock_submit_scan.mock_calls)
7573

7674
@mock.patch("requests.sessions.Session.get")
7775
def test_scancodeio_fetch_scan_list(self, mock_session_get):
7876
scancodeio = ScanCodeIO(self.dataspace)
79-
self.assertIsNone(scancodeio.fetch_scan_list())
80-
self.assertFalse(mock_session_get.called)
77+
dataspace_uid = get_hash_uid(self.dataspace.uuid)
78+
user_uid = get_hash_uid(self.basic_user.uuid)
8179

82-
scancodeio.fetch_scan_list(user=self.basic_user)
80+
scancodeio.fetch_scan_list()
8381
params = mock_session_get.call_args.kwargs["params"]
84-
expected = {"format": "json", "name__endswith": get_hash_uid(self.basic_user.uuid)}
82+
expected = {
83+
"format": "json",
84+
"name__contains": dataspace_uid,
85+
}
8586
self.assertEqual(expected, params)
8687

87-
scancodeio.fetch_scan_list(dataspace=self.basic_user.dataspace)
88+
scancodeio.fetch_scan_list(user=self.basic_user)
8889
params = mock_session_get.call_args.kwargs["params"]
8990
expected = {
9091
"format": "json",
91-
"name__contains": get_hash_uid(self.basic_user.dataspace.uuid),
92+
"name__contains": dataspace_uid,
93+
"label": user_uid,
9294
}
9395
self.assertEqual(expected, params)
9496

95-
scancodeio.fetch_scan_list(
96-
user=self.basic_user,
97-
dataspace=self.basic_user.dataspace,
98-
extra_params="extra",
99-
)
97+
scancodeio.fetch_scan_list(extra_params="extra")
10098
params = mock_session_get.call_args.kwargs["params"]
10199
expected = {
102100
"format": "json",
103-
"name__contains": get_hash_uid(self.basic_user.dataspace.uuid),
104-
"name__endswith": get_hash_uid(self.basic_user.uuid),
101+
"name__contains": get_hash_uid(self.dataspace.uuid),
105102
"extra_params": "extra",
106103
}
107104
self.assertEqual(expected, params)
@@ -115,16 +112,11 @@ def test_scancodeio_fetch_scan_info(self, mock_session_get):
115112
params = mock_session_get.call_args.kwargs["params"]
116113
expected = {
117114
"name__startswith": get_hash_uid(uri),
118-
"name__contains": get_hash_uid(self.basic_user.dataspace.uuid),
115+
"name__contains": get_hash_uid(self.dataspace.uuid),
119116
"format": "json",
120117
}
121118
self.assertEqual(expected, params)
122119

123-
scancodeio.fetch_scan_info(uri=uri, user=self.basic_user)
124-
params = mock_session_get.call_args.kwargs["params"]
125-
expected["name__endswith"] = get_hash_uid(self.basic_user.uuid)
126-
self.assertEqual(expected, params)
127-
128120
@mock.patch("dejacode_toolkit.scancodeio.ScanCodeIO.request_get")
129121
def test_scancodeio_find_project(self, mock_request_get):
130122
scancodeio = ScanCodeIO(self.dataspace)
@@ -340,19 +332,13 @@ def test_scancodeio_get_notice_text_from_key_files(self):
340332
self.assertEqual("", notice_text)
341333

342334
@mock.patch("component_catalog.models.Package.update_from_scan")
343-
def test_scancodeio_check_for_existing_scan_workaround(self, mock_update_from_scan):
335+
def test_scancodeio_update_package_from_existing_scan_data(self, mock_update_from_scan):
344336
mock_update_from_scan.return_value = ["updated_field"]
345337
download_url = self.package1.download_url
346338
user = self.basic_user
347339

348-
response_json = None
349-
results = check_for_existing_scan_workaround(response_json, download_url, user)
350-
self.assertIsNone(results)
351-
352-
response_json = {"success": True}
353-
results = check_for_existing_scan_workaround(response_json, download_url, user)
340+
results = update_package_from_existing_scan_data("unknown_url", user)
354341
self.assertIsNone(results)
355342

356-
response_json = {"name": "project with this name already exists."}
357-
results = check_for_existing_scan_workaround(response_json, download_url, user)
343+
results = update_package_from_existing_scan_data(download_url, user)
358344
self.assertEqual(["updated_field"], results)

component_catalog/views.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1742,7 +1742,6 @@ def get_queryset(self):
17421742
scancodeio = ScanCodeIO(dataspace)
17431743
self.list_data = (
17441744
scancodeio.fetch_scan_list(
1745-
dataspace=dataspace,
17461745
user=user if self.request.GET.get("created_by_me") else None,
17471746
**filters,
17481747
)

dejacode_toolkit/scancodeio.py

Lines changed: 24 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -62,16 +62,21 @@ def get_project_info(self, download_url):
6262
return scan_info.get("results")[0]
6363

6464
def submit_scan(self, uri, user_uuid, dataspace_uuid):
65+
"""
66+
Submit package scan request to ScanCode.io.
67+
An unique ID for the user is set as a project label, available for filtering.
68+
"""
69+
webhook_url = get_webhook_url("notifications:send_scan_notification", user_uuid)
70+
6571
data = {
66-
"name": get_project_name(uri, user_uuid, dataspace_uuid),
72+
"name": get_project_name(uri, dataspace_uuid),
6773
"input_urls": uri,
6874
"pipeline": "scan_single_package",
6975
"execute_now": True,
76+
"webhook_url": webhook_url,
77+
"labels": [get_hash_uid(user_uuid)],
7078
}
7179

72-
webhook_url = get_webhook_url("notifications:send_scan_notification", user_uuid)
73-
data["webhook_url"] = webhook_url
74-
7580
logger.debug(f'{self.label}: submit scan uri="{uri}" webhook_url="{webhook_url}"')
7681
return self.request_post(url=self.project_api_url, json=data)
7782

@@ -102,14 +107,11 @@ def start_pipeline(self, run_url):
102107
start_pipeline_url = run_url + "start_pipeline/"
103108
return self.request_post(url=start_pipeline_url)
104109

105-
def fetch_scan_list(self, user=None, dataspace=None, **extra_payload):
106-
payload = {}
107-
108-
if dataspace:
109-
payload["name__contains"] = get_hash_uid(dataspace.uuid)
110+
def fetch_scan_list(self, user=None, **extra_payload):
111+
payload = {"name__contains": get_hash_uid(self.dataspace.uuid)}
110112

111113
if user:
112-
payload["name__endswith"] = get_hash_uid(user.uuid)
114+
payload["label"] = get_hash_uid(user.uuid)
113115

114116
payload.update(extra_payload)
115117
if not payload:
@@ -131,15 +133,11 @@ def find_project(self, **kwargs):
131133
if response.get("count") == 1:
132134
return response.get("results")[0]
133135

134-
def fetch_scan_info(self, uri, user=None):
136+
def fetch_scan_info(self, uri):
135137
payload = {
136138
"name__startswith": get_hash_uid(uri),
137139
"name__contains": get_hash_uid(self.dataspace.uuid),
138140
}
139-
140-
if user:
141-
payload["name__endswith"] = get_hash_uid(user.uuid)
142-
143141
logger.debug(f'{self.label}: fetch scan info uri="{uri}"')
144142
return self.request_get(url=self.project_api_url, params=payload)
145143

@@ -491,18 +489,16 @@ def get_hash_uid(value):
491489
return md5(str(value).encode("utf-8"), usedforsecurity=False).hexdigest()[:10]
492490

493491

494-
def get_project_name(uri, user_uuid, dataspace_uuid):
492+
def get_project_name(uri, dataspace_uuid):
495493
"""
496494
Return a project name based on a hash of the provided `uri` combined with a hash
497-
of the `user_uuid` and `dataspace_uuid`.
495+
of the `dataspace_uuid`.
498496
499-
project_name = "uri_hash.dataspace_uuid_hash.user_uuid_hash"
497+
project_name = "uri_hash.dataspace_uuid_hash"
500498
"""
501499
uri_hash = get_hash_uid(uri)
502500
dataspace_hash = get_hash_uid(dataspace_uuid)
503-
user_hash = get_hash_uid(user_uuid)
504-
505-
return f"{uri_hash}.{dataspace_hash}.{user_hash}"
501+
return f"{uri_hash}.{dataspace_hash}"
506502

507503

508504
def get_webhook_url(view_name, user_uuid):
@@ -560,22 +556,15 @@ def get_notice_text_from_key_files(scan_summary, separator="\n\n---\n\n"):
560556
return notice_text
561557

562558

563-
def check_for_existing_scan_workaround(response_json, uri, user):
559+
def update_package_from_existing_scan_data(uri, user):
564560
"""
565-
Workaroud the case where the Scan already exisit on the ScanCode.io side before
561+
Workaroud the case where the Scan already exisits on the ScanCode.io side before
566562
the package is created on the DejaCode side.
567563
This can happen if the package is deleted then re-created from the same user
568564
providing the same download URL.
569565
"""
570-
if not response_json or not isinstance(response_json, dict):
571-
return
572-
573-
already_exists_message = "project with this name already exists."
574-
already_exists = already_exists_message in response_json.get("name", [])
575-
576-
if already_exists:
577-
Package = apps.get_model("component_catalog", "package")
578-
package = Package.objects.get_or_none(download_url=uri, dataspace=user.dataspace)
579-
if package:
580-
updated_fields = package.update_from_scan(user)
581-
return updated_fields
566+
Package = apps.get_model("component_catalog", "package")
567+
package = Package.objects.get_or_none(download_url=uri, dataspace=user.dataspace)
568+
if package:
569+
updated_fields = package.update_from_scan(user)
570+
return updated_fields

dje/tasks.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from django_rq import job
2121

2222
from dejacode_toolkit.scancodeio import ScanCodeIO
23-
from dejacode_toolkit.scancodeio import check_for_existing_scan_workaround
23+
from dejacode_toolkit.scancodeio import update_package_from_existing_scan_data
2424
from dje.utils import is_available
2525

2626
logger = logging.getLogger(__name__)
@@ -109,11 +109,17 @@ def scancodeio_submit_scan(uris, user_uuid, dataspace_uuid):
109109

110110
scancodeio = ScanCodeIO(user.dataspace)
111111
for uri in uris:
112-
if is_available(uri):
113-
response_json = scancodeio.submit_scan(uri, user_uuid, dataspace_uuid)
114-
check_for_existing_scan_workaround(response_json, uri, user)
115-
else:
112+
if not is_available(uri):
116113
logger.info(f'uri="{uri}" is not reachable.')
114+
continue
115+
116+
# Check if a Scan is already available in ScanCode.io for this URI.
117+
existing_project = scancodeio.get_project_info(download_url=uri)
118+
if existing_project:
119+
logger.info(f'Update the local uri="{uri}" package from available Scan data.')
120+
update_package_from_existing_scan_data(uri, user)
121+
else:
122+
scancodeio.submit_scan(uri, user_uuid, dataspace_uuid)
117123

118124

119125
@job("default", timeout="3h")

0 commit comments

Comments
 (0)