Skip to content

Commit 76e1daa

Browse files
committed
Use matchcode-toolkit branch
* there is a bug in the code stemming function Signed-off-by: Jono Yang <[email protected]>
1 parent 2b4d811 commit 76e1daa

File tree

3 files changed

+37
-1
lines changed

3 files changed

+37
-1
lines changed

scanpipe/pipes/matchcode.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import requests
2929
from matchcode_toolkit.fingerprinting import compute_codebase_directory_fingerprints
3030
from matchcode_toolkit.fingerprinting import get_file_fingerprint_hashes
31+
from matchcode_toolkit.fingerprinting import get_line_by_pos
3132
from matchcode_toolkit.fingerprinting import get_stemmed_file_fingerprint_hashes
3233
from scancode import Scanner
3334

@@ -405,3 +406,14 @@ def create_packages_from_match_results(project, match_results):
405406
package_data=matched_package,
406407
status=flag.MATCHED_TO_PURLDB_PACKAGE,
407408
)
409+
match_resources = match_results.get("files", [])
410+
for match_resource in match_resources:
411+
match_resource_extra_data = match_resource["extra_data"]
412+
if match_resource_extra_data:
413+
resource = project.codebaseresources.get(path=match_resource["path"])
414+
# compute line_by_pos for displaying matches in CodebaseResource detail view
415+
with open(resource.location) as f:
416+
content = f.read()
417+
line_by_pos = get_line_by_pos(content)
418+
match_resource_extra_data["line_by_pos"] = line_by_pos
419+
resource.update_extra_data(match_resource_extra_data)

scanpipe/views.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
import saneyaml
6262
import xlsxwriter
6363
from django_filters.views import FilterView
64+
from licensedcode.spans import Span
6465
from packageurl.contrib.django.models import PACKAGE_URL_FIELDS
6566

6667
from scancodeio.auth import ConditionalLoginRequired
@@ -1948,6 +1949,26 @@ def get_license_annotations(self, field_name):
19481949

19491950
return annotations
19501951

1952+
def get_matched_snippet_annotations(self, resource):
1953+
# convert qspan from list of ints to Spans
1954+
matched_snippet_annotations = []
1955+
matched_snippets = resource.extra_data.get("matched_snippets")
1956+
if matched_snippets:
1957+
line_by_pos = resource.extra_data.get("line_by_pos")
1958+
for matched_snippet in matched_snippets:
1959+
match_detections = matched_snippet["match_detections"]
1960+
qspan = Span(match_detections)
1961+
for span in qspan.subspans():
1962+
# line_by_pos is stored as JSON and keys in JSON are always
1963+
# strings
1964+
matched_snippet_annotations.append(
1965+
{
1966+
"start_line": line_by_pos[str(span.start)],
1967+
"end_line": line_by_pos[str(span.end)],
1968+
}
1969+
)
1970+
return matched_snippet_annotations
1971+
19511972
def get_context_data(self, **kwargs):
19521973
context = super().get_context_data(**kwargs)
19531974
resource = self.object
@@ -1964,6 +1985,9 @@ def get_context_data(self, **kwargs):
19641985
"licenses": license_annotations,
19651986
}
19661987

1988+
matched_snippet_annotations = self.get_matched_snippet_annotations(resource)
1989+
context["detected_values"]["matched_snippets"] = matched_snippet_annotations
1990+
19671991
fields = [
19681992
("copyrights", "copyright"),
19691993
("holders", "holder"),

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ install_requires =
9797
cyclonedx-python-lib==9.1.0
9898
jsonschema==4.23.0
9999
# MatchCode-toolkit
100-
matchcode-toolkit==7.2.1
100+
matchcode-toolkit @ git+https://github.com/aboutcode-org/matchcode-toolkit.git@bf9fd9afd6ce1dd25dce4935135ea4118fc80b06
101101
# Univers
102102
univers==30.12.1
103103
# Markdown

0 commit comments

Comments
 (0)