Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@ dependencies = [
"gitpython",
"rospkg",
"scancode-toolkit>=32.0.8",
"spdx-tools>=0.7.0rc0"
"spdx-tools>=0.7.0rc0",
"requests",
"jellyfish"
]
requires-python = ">=3.7"

Expand Down
92 changes: 70 additions & 22 deletions src/ros_license_toolkit/license_checks/license_text_exists_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,18 @@
import os
from typing import Any, Dict, Optional

import jellyfish
import requests # type: ignore[import-untyped]

from ros_license_toolkit.checks import Check, Status
from ros_license_toolkit.common import get_spdx_license_name
from ros_license_toolkit.license_tag import LicenseTag, is_license_name_in_spdx_list
from ros_license_toolkit.package import Package
from ros_license_toolkit.ui_elements import red

# Value for minimal percentage between license texts for them to be accepted
SIMILARITY_THRESHOLD = 90 # in percent


class LicenseTextExistsCheck(Check):
"""This ensures that the license text file referenced by the tag exists."""
Expand Down Expand Up @@ -85,30 +91,43 @@ def _check_licenses(self, package: Package) -> None:
)
self.missing_license_texts_status[license_tag] = Status.FAILURE
continue

if actual_license != license_tag.get_license_id():
self.license_tags_without_license_text[license_tag] = (
f"License text file '{license_text_file}' is "
+ f"of license {actual_license} but tag is "
+ f"{license_tag.get_license_id()}."
)
# If Tag and File both are in SPDX but don't match -> Error
if is_license_name_in_spdx_list(license_tag.get_license_id()):
self.missing_license_texts_status[license_tag] = Status.FAILURE
else:
self.missing_license_texts_status[license_tag] = Status.WARNING
self.files_with_wrong_tags[license_tag] = {
"actual_license": actual_license,
"license_tag": license_tag.get_license_id(),
}
continue
if license_tag.has_license_text_file():
license_file_for_tag = (
package.abspath + "/" + license_tag.get_license_text_file()
)
with open(license_file_for_tag, "r", encoding="utf-8") as f:
content = f.read()
similarity_of_texts = self.compare_text_with_spdx_text(license_tag, content)

# IDEA: if accepted, add the tag to the package.found_license_texts, since scanning
# has failed to do so. Also solves problem of license_file_referenced check

# if similarity couldn't be determined or is too low --> fail, else success
if similarity_of_texts is None or similarity_of_texts < SIMILARITY_THRESHOLD:
self.license_tags_without_license_text[license_tag] = (
f"License text file '{license_text_file}' is "
+ f"of license {actual_license} but tag is "
+ f"{license_tag.get_license_id()}."
)
# If Tag and File both are in SPDX but don't match -> Error
if is_license_name_in_spdx_list(license_tag.get_license_id()):
self.missing_license_texts_status[license_tag] = Status.FAILURE
else:
self.missing_license_texts_status[license_tag] = Status.WARNING
self.files_with_wrong_tags[license_tag] = {
"actual_license": actual_license,
"license_tag": license_tag.get_license_id(),
}
continue

def _evaluate_results(self):
if len(self.license_tags_without_license_text) > 0:
if max(self.missing_license_texts_status.values()) == Status.WARNING:
self._warning(
"Since they are not in the SPDX list, "
"we can not check if these tags have the correct "
"license text:\n"
"Since they are not in the SPDX list, we can not check if these tags have the"
" correct license text:\n"
+ "\n".join(
[
f" '{x[0]}': {x[1]}"
Expand All @@ -118,18 +137,47 @@ def _evaluate_results(self):
)
else:
self._failed(
"The following license tags do not "
"have a valid license text "
"file:\n"
"The following license tags do not have a valid license text file:\n"
+ "\n".join(
[
f" '{x[0]}': {x[1]}"
for x in self.license_tags_without_license_text.items()
]
)
)
self.verbose_output = red(
self.verbose_output = red( # pylint: disable=attribute-defined-outside-init
"\n".join([f" '{x[0]}': {x[1]}" for x in self.found_license_texts.items()])
)
else:
self._success("All license tags have a valid license text file.")

def compare_text_with_spdx_text(self, tag, found_lic_text):
"""Get similarity percent between original license text (from spdx api) and given license
text."""
cache_dir: str = os.path.expanduser("~/.cache/ros_license_toolkit")
os.makedirs(cache_dir, exist_ok=True)
license_file = os.path.join(cache_dir, f"license_{tag}.txt")

if not os.path.exists(license_file):
url = f"https://spdx.org/licenses/{tag}.json"
response = requests.get(url, timeout=100)
if response is not None and response.status_code == 200:
parsed_response = response.json()
original_text = parsed_response["licenseText"]
with open(license_file, "w", encoding="utf-8") as f:
f.write(original_text)
else:
return None
else:
with open(license_file, "r", encoding="utf-8") as f:
original_text = f.read()
difference = self.get_similarity_percent(original_text, found_lic_text)
return difference

def get_similarity_percent(self, text1, text2):
"""Levenshtein distance based similarity percent of text1 and text2, regularized to longer
text for percent value."""
lev_dis = float(jellyfish.levenshtein_distance(text1, text2))
bigger = float(max(len(text1), len(text2)))
similarity_percentage = round(100 * (bigger - lev_dis) / bigger, 2)
return similarity_percentage
Loading