1919import os
2020from typing import Any , Dict , Optional
2121
22+ import jellyfish
23+ import requests # type: ignore[import-untyped]
24+
2225from ros_license_toolkit .checks import Check , Status
2326from ros_license_toolkit .common import get_spdx_license_name
2427from ros_license_toolkit .license_tag import LicenseTag , is_license_name_in_spdx_list
2528from ros_license_toolkit .package import Package
2629from ros_license_toolkit .ui_elements import red
2730
31+ # Value for minimal percentage between license texts for them to be accepted
32+ SIMILARITY_THRESHOLD = 90 # in percent
33+
2834
2935class LicenseTextExistsCheck (Check ):
3036 """This ensures that the license text file referenced by the tag exists."""
@@ -85,30 +91,43 @@ def _check_licenses(self, package: Package) -> None:
8591 )
8692 self .missing_license_texts_status [license_tag ] = Status .FAILURE
8793 continue
94+
8895 if actual_license != license_tag .get_license_id ():
89- self .license_tags_without_license_text [license_tag ] = (
90- f"License text file '{ license_text_file } ' is "
91- + f"of license { actual_license } but tag is "
92- + f"{ license_tag .get_license_id ()} ."
93- )
94- # If Tag and File both are in SPDX but don't match -> Error
95- if is_license_name_in_spdx_list (license_tag .get_license_id ()):
96- self .missing_license_texts_status [license_tag ] = Status .FAILURE
97- else :
98- self .missing_license_texts_status [license_tag ] = Status .WARNING
99- self .files_with_wrong_tags [license_tag ] = {
100- "actual_license" : actual_license ,
101- "license_tag" : license_tag .get_license_id (),
102- }
103- continue
96+ if license_tag .has_license_text_file ():
97+ license_file_for_tag = (
98+ package .abspath + "/" + license_tag .get_license_text_file ()
99+ )
100+ with open (license_file_for_tag , "r" , encoding = "utf-8" ) as f :
101+ content = f .read ()
102+ similarity_of_texts = self .compare_text_with_spdx_text (license_tag , content )
103+
104+ # IDEA: if accepted, add the tag to the package.found_license_texts, since scanning
105+ # has failed to do so. Also solves problem of license_file_referenced check
106+
107+ # if similarity couldn't be determined or is too low --> fail, else success
108+ if similarity_of_texts is None or similarity_of_texts < SIMILARITY_THRESHOLD :
109+ self .license_tags_without_license_text [license_tag ] = (
110+ f"License text file '{ license_text_file } ' is "
111+ + f"of license { actual_license } but tag is "
112+ + f"{ license_tag .get_license_id ()} ."
113+ )
114+ # If Tag and File both are in SPDX but don't match -> Error
115+ if is_license_name_in_spdx_list (license_tag .get_license_id ()):
116+ self .missing_license_texts_status [license_tag ] = Status .FAILURE
117+ else :
118+ self .missing_license_texts_status [license_tag ] = Status .WARNING
119+ self .files_with_wrong_tags [license_tag ] = {
120+ "actual_license" : actual_license ,
121+ "license_tag" : license_tag .get_license_id (),
122+ }
123+ continue
104124
105125 def _evaluate_results (self ):
106126 if len (self .license_tags_without_license_text ) > 0 :
107127 if max (self .missing_license_texts_status .values ()) == Status .WARNING :
108128 self ._warning (
109- "Since they are not in the SPDX list, "
110- "we can not check if these tags have the correct "
111- "license text:\n "
129+ "Since they are not in the SPDX list, we can not check if these tags have the"
130+ " correct license text:\n "
112131 + "\n " .join (
113132 [
114133 f" '{ x [0 ]} ': { x [1 ]} "
@@ -118,18 +137,47 @@ def _evaluate_results(self):
118137 )
119138 else :
120139 self ._failed (
121- "The following license tags do not "
122- "have a valid license text "
123- "file:\n "
140+ "The following license tags do not have a valid license text file:\n "
124141 + "\n " .join (
125142 [
126143 f" '{ x [0 ]} ': { x [1 ]} "
127144 for x in self .license_tags_without_license_text .items ()
128145 ]
129146 )
130147 )
131- self .verbose_output = red (
148+ self .verbose_output = red ( # pylint: disable=attribute-defined-outside-init
132149 "\n " .join ([f" '{ x [0 ]} ': { x [1 ]} " for x in self .found_license_texts .items ()])
133150 )
134151 else :
135152 self ._success ("All license tags have a valid license text file." )
153+
154+ def compare_text_with_spdx_text (self , tag , found_lic_text ):
155+ """Get similarity percent between original license text (from spdx api) and given license
156+ text."""
157+ cache_dir : str = os .path .expanduser ("~/.cache/ros_license_toolkit" )
158+ os .makedirs (cache_dir , exist_ok = True )
159+ license_file = os .path .join (cache_dir , f"license_{ tag } .txt" )
160+
161+ if not os .path .exists (license_file ):
162+ url = f"https://spdx.org/licenses/{ tag } .json"
163+ response = requests .get (url , timeout = 100 )
164+ if response is not None and response .status_code == 200 :
165+ parsed_response = response .json ()
166+ original_text = parsed_response ["licenseText" ]
167+ with open (license_file , "w" , encoding = "utf-8" ) as f :
168+ f .write (original_text )
169+ else :
170+ return None
171+ else :
172+ with open (license_file , "r" , encoding = "utf-8" ) as f :
173+ original_text = f .read ()
174+ difference = self .get_similarity_percent (original_text , found_lic_text )
175+ return difference
176+
177+ def get_similarity_percent (self , text1 , text2 ):
178+ """Levenshtein distance based similarity percent of text1 and text2, regularized to longer
179+ text for percent value."""
180+ lev_dis = float (jellyfish .levenshtein_distance (text1 , text2 ))
181+ bigger = float (max (len (text1 ), len (text2 )))
182+ similarity_percentage = round (100 * (bigger - lev_dis ) / bigger , 2 )
183+ return similarity_percentage
0 commit comments