Skip to content

Commit 3561edf

Browse files
Remove copyright of GPL family license text
Signed-off-by: Wonjae Park <[email protected]>
1 parent 4b5777c commit 3561edf

File tree

2 files changed

+126
-12
lines changed

2 files changed

+126
-12
lines changed

src/fosslight_source/_parsing_scancode_file_item.py

Lines changed: 53 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,35 @@
3030
SPDX_REPLACE_WORDS = ["(", ")"]
3131
KEY_AND = r"(?<=\s)and(?=\s)"
3232
KEY_OR = r"(?<=\s)or(?=\s)"
33+
GPL_LICENSE_PATTERNS = [
34+
r'gpl', # gpl, gpl-2.0, gpl-3.0
35+
r'lgpl', # lgpl, lgpl-2.1, lgpl-3.0
36+
r'agpl', # agpl, agpl-3.0
37+
r'gnu.*general.*public.*license', # GNU General Public License
38+
r'gnu.*lesser.*general.*public.*license', # GNU Lesser General Public License
39+
r'gnu.*affero.*general.*public.*license' # GNU Affero General Public License
40+
]
41+
42+
43+
def is_gpl_family_license(licenses: list) -> bool:
44+
if not licenses:
45+
return False
46+
47+
for license_name in licenses:
48+
if not license_name:
49+
continue
50+
51+
license_lower = license_name.lower()
52+
for pattern in GPL_LICENSE_PATTERNS:
53+
if re.search(pattern, license_lower):
54+
logger.debug(f"GPL family license detected: {license_name}")
55+
return True
56+
57+
return False
58+
59+
60+
def should_remove_copyright_for_gpl_license_text(licenses: list, is_license_text: bool) -> bool:
61+
return is_license_text and is_gpl_family_license(licenses)
3362

3463

3564
def get_error_from_header(header_item: list) -> Tuple[bool, str]:
@@ -100,8 +129,6 @@ def parsing_scancode_32_earlier(scancode_file_list: list, has_error: bool = Fals
100129
pass
101130
copyright_value_list.append(copyright_data)
102131

103-
result_item.copyright = copyright_value_list
104-
105132
# Set the license value
106133
license_detected = []
107134
if licenses is None or licenses == "":
@@ -165,14 +192,21 @@ def parsing_scancode_32_earlier(scancode_file_list: list, has_error: bool = Fals
165192
if len(license_detected) > 0:
166193
result_item.licenses = license_detected
167194

195+
if is_manifest_file(file_path):
196+
result_item.is_license_text = True
197+
198+
# Remove copyright info for license text file of GPL family
199+
if should_remove_copyright_for_gpl_license_text(license_detected, result_item.is_license_text):
200+
logger.debug(f"Removing copyright for GPL family license text file: {file_path}")
201+
result_item.copyright = []
202+
else:
203+
result_item.copyright = copyright_value_list
204+
168205
if len(license_expression_list) > 0:
169206
license_expression_list = list(
170207
set(license_expression_list))
171208
result_item.comment = ','.join(license_expression_list)
172209

173-
if is_manifest_file(file_path):
174-
result_item.is_license_text = True
175-
176210
if is_exclude_file(file_path, prev_dir, prev_dir_value):
177211
result_item.exclude = True
178212
scancode_file_item.append(result_item)
@@ -227,7 +261,6 @@ def parsing_scancode_32_later(
227261
except Exception:
228262
pass
229263
copyright_value_list.append(copyright_data)
230-
result_item.copyright = copyright_value_list
231264

232265
license_detected = []
233266
licenses = file.get("license_detections", [])
@@ -263,6 +296,20 @@ def parsing_scancode_32_later(
263296
license_list[lic_matched_key] = lic_info
264297
license_detected.append(found_lic)
265298
result_item.licenses = license_detected
299+
300+
result_item.exclude = is_exclude_file(file_path)
301+
result_item.is_license_text = file.get("percentage_of_license_text", 0) > 90 or is_notice_file(file_path)
302+
303+
if is_manifest_file(file_path) and len(license_detected) > 0:
304+
result_item.is_license_text = True
305+
306+
# Remove copyright info for license text file of GPL family
307+
if should_remove_copyright_for_gpl_license_text(license_detected, result_item.is_license_text):
308+
logger.debug(f"Removing copyright for GPL family license text file: {file_path}")
309+
result_item.copyright = []
310+
else:
311+
result_item.copyright = copyright_value_list
312+
266313
if len(license_detected) > 1:
267314
license_expression_spdx = file.get("detected_license_expression_spdx", "")
268315
license_expression = file.get("detected_license_expression", "")
@@ -271,12 +318,6 @@ def parsing_scancode_32_later(
271318
if license_expression:
272319
result_item.comment = license_expression
273320

274-
result_item.exclude = is_exclude_file(file_path)
275-
result_item.is_license_text = file.get("percentage_of_license_text", 0) > 90 or is_notice_file(file_path)
276-
277-
if is_manifest_file(file_path) and len(license_detected) > 0:
278-
result_item.is_license_text = True
279-
280321
scancode_file_item.append(result_item)
281322
except Exception as ex:
282323
msg.append(f"Error Parsing item: {ex}")

tests/test_tox.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,16 @@
66
import subprocess
77
import pytest
88
import shutil
9+
import sys
10+
11+
# Add project root to sys.path for importing FL Source modules
12+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))
13+
14+
# Import after sys.path modification to access our custom GPL license functions
15+
# flake8: noqa E402
16+
from fosslight_source._parsing_scancode_file_item import (
17+
is_gpl_family_license, should_remove_copyright_for_gpl_license_text
18+
)
919

1020
remove_directories = ["test_scan", "test_scan2", "test_scan3"]
1121

@@ -26,6 +36,69 @@ def run_command(command):
2636
return success, process.stdout if success else process.stderr
2737

2838

39+
def test_is_gpl_family_license():
40+
gpl_licenses = [
41+
["gpl-2.0"],
42+
["gpl-3.0"],
43+
["lgpl-2.1"],
44+
["lgpl-3.0"],
45+
["agpl-3.0"],
46+
["GPL-2.0"],
47+
["LGPL-2.1"],
48+
["AGPL-3.0"],
49+
["gnu-general-public-license"],
50+
["gnu-lesser-general-public-license"],
51+
["gnu-affero-general-public-license"],
52+
["gpl-2.0", "mit"],
53+
["mit", "lgpl-3.0"]
54+
]
55+
56+
non_gpl_licenses = [
57+
["mit"],
58+
["apache-2.0"],
59+
["bsd-3-clause"],
60+
["mozilla-2.0"],
61+
["isc"],
62+
[],
63+
["mit", "apache-2.0"]
64+
]
65+
66+
for licenses in gpl_licenses:
67+
assert is_gpl_family_license(licenses), \
68+
f"Should detect GPL family license: {licenses}"
69+
70+
for licenses in non_gpl_licenses:
71+
assert not is_gpl_family_license(licenses), \
72+
f"Should not detect GPL family license: {licenses}"
73+
74+
75+
def test_should_remove_copyright_for_gpl_license_text():
76+
assert should_remove_copyright_for_gpl_license_text(["gpl-2.0"], True), \
77+
"Should remove copyright for GPL license text file"
78+
assert should_remove_copyright_for_gpl_license_text(["lgpl-3.0"], True), \
79+
"Should remove copyright for LGPL license text file"
80+
assert should_remove_copyright_for_gpl_license_text(["agpl-3.0"], True), \
81+
"Should remove copyright for AGPL license text file"
82+
83+
assert not should_remove_copyright_for_gpl_license_text(["gpl-2.0"], False), \
84+
"Should NOT remove copyright for GPL source file"
85+
assert not should_remove_copyright_for_gpl_license_text(["lgpl-3.0"], False), \
86+
"Should NOT remove copyright for LGPL source file"
87+
88+
assert not should_remove_copyright_for_gpl_license_text(["mit"], True), \
89+
"Should NOT remove copyright for MIT license text file"
90+
assert not should_remove_copyright_for_gpl_license_text(["apache-2.0"], True), \
91+
"Should NOT remove copyright for Apache license text file"
92+
93+
assert not should_remove_copyright_for_gpl_license_text(["mit"], False), \
94+
"Should NOT remove copyright for MIT source file"
95+
96+
assert not should_remove_copyright_for_gpl_license_text([], True), \
97+
"Should NOT remove copyright for empty license list"
98+
assert not should_remove_copyright_for_gpl_license_text([], False), \
99+
"Should NOT remove copyright for empty license list"
100+
101+
29102
def test_run():
30103
scan_success, _ = run_command("fosslight_source -p tests/test_files -j -m -o test_scan")
31104
scan_exclude_success, _ = run_command("fosslight_source -p tests -e test_files/test cli_test.py -j -m -o test_scan2")

0 commit comments

Comments
 (0)