Skip to content

Commit ef4232c

Browse files
authored
Merge pull request #2626 from nexB/omnibus-license-updates-july-21
Omnibus license updates July/Aug 21
2 parents 6296000 + 5ca96d1 commit ef4232c

File tree

1,332 files changed

+147306
-2481
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,332 files changed

+147306
-2481
lines changed

etc/scripts/licenses/synclic.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
from commoncode import fileutils
2727

2828
import licensedcode
29+
from licensedcode import models
2930
from licensedcode.models import load_licenses
3031
from licensedcode.models import License
3132

@@ -76,6 +77,7 @@ def _clean(licenses):
7677
lic.notes = clean_text(lic.notes)
7778

7879
if updated:
80+
models.update_ignorables(lic, verbose=False)
7981
lic.dump()
8082

8183
for lics in [self.by_key, self.non_english_by_key]:
@@ -174,6 +176,7 @@ def get_licenses(self, scancode_licenses=None, **kwargs):
174176
try:
175177
with io.open(lic.text_file, 'w', encoding='utf-8')as tf:
176178
tf.write(text)
179+
models.update_ignorables(lic, verbose=False)
177180
lic.dump()
178181
licenses.append(lic)
179182
except:
@@ -546,7 +549,7 @@ def build_license(self, mapping, scancode_licenses):
546549
# instead each part of the combo
547550
dejacode_special_composites = set([
548551
'intel-bsd-special',
549-
#'newlib-subdirectory',
552+
# 'newlib-subdirectory',
550553
])
551554
is_component_license = mapping.get('is_component_license') or False
552555

@@ -816,8 +819,8 @@ def license_to_dict(lico):
816819

817820

818821
def merge_licenses(
819-
scancode_license,
820-
external_license,
822+
scancode_license,
823+
external_license,
821824
updatable_attributes,
822825
from_spdx=False,
823826
):
@@ -948,7 +951,7 @@ def update_external(_attrib, _sc_val, _ext_val):
948951
# on difference, the other license wins
949952
if scancode_value != external_value:
950953
# unless we have SPDX ids
951-
if attrib== 'spdx_license_key' and external_value.startswith('LicenseRef-scancode'):
954+
if attrib == 'spdx_license_key' and external_value.startswith('LicenseRef-scancode'):
952955
update_external(attrib, scancode_value, external_value)
953956
else:
954957
update_scancode(attrib, scancode_value, external_value)
@@ -1138,10 +1141,14 @@ def synchronize_licenses(scancode_licenses, external_source, use_spdx_key=False,
11381141

11391142
# finally write changes in place for updates and news
11401143
for k in updated_in_scancode | added_to_scancode:
1141-
scancodes_by_key[k].dump()
1144+
lic = scancodes_by_key[k]
1145+
models.update_ignorables(lic, verbose=False)
1146+
lic.dump()
11421147

11431148
for k in updated_in_external | added_to_external:
1144-
externals_by_key[k].dump()
1149+
lic = externals_by_key[k]
1150+
# models.update_ignorables(lic, verbose=False)
1151+
lic.dump()
11451152

11461153
# TODO: at last: print report of incorrect OTHER licenses to submit
11471154
# updates eg. make API calls to DejaCode to create or update

src/cluecode/copyrights.py

Lines changed: 52 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1165,6 +1165,9 @@ def from_node(
11651165
(r'^Create$', 'NN'),
11661166
(r'^Engine\.$', 'NN'),
11671167
(r'^While$', 'NN'),
1168+
(r'^Review', 'NN'),
1169+
(r'^Help', 'NN'),
1170+
(r'^Web', 'NN'),
11681171

11691172
# alone this is not enough for an NNP
11701173
(r'^Free$', 'NN'),
@@ -1302,34 +1305,34 @@ def from_node(
13021305
(r'^LIMITED[,\.]??$', 'COMP'),
13031306

13041307
# Caps company suffixes
1305-
(r'^INC\.?,?\)?$', 'COMP'),
1306-
(r'^INCORPORATED\.?,?\)?$', 'COMP'),
1307-
(r'^CORP\.?,?\)?$', 'COMP'),
1308-
(r'^CORPORATION\.?,?\)?$', 'COMP'),
1309-
(r'^FOUNDATION\.?,?$', 'COMP'),
1310-
(r'^GROUP\.?,?$', 'COMP'),
1311-
(r'^COMPANY\.?,?$', 'COMP'),
1312-
(r'^\(tm\).?$', 'COMP'),
1313-
(r'^[Ff]orum\.?,?', 'COMP'),
1308+
(r'^INC[\.,\)]*$', 'COMP'),
1309+
(r'^INCORPORATED[\.,\)]*$', 'COMP'),
1310+
(r'^CORP[\.,\)]*$', 'COMP'),
1311+
(r'^CORPORATION[\.,\)]*$', 'COMP'),
1312+
(r'^FOUNDATION[\.,\)]*$', 'COMP'),
1313+
(r'^GROUP[\.,\)]*$', 'COMP'),
1314+
(r'^COMPANY[\.,\)]*$', 'COMP'),
1315+
(r'^\(tm\)[\.,]?$', 'COMP'),
1316+
(r'^[Ff]orum[\.,\)]*', 'COMP'),
13141317

13151318
# company suffix
1316-
(r'^[Cc]orp\.?,?\)?$', 'COMP'),
1317-
(r'^[Cc]orp(oration|\.,?)?\)?$', 'COMP'),
1318-
(r'^[Cc][oO]\.,?$', 'COMP'),
1319-
(r'^[Cc]orporations?\.?,?$', 'COMP'),
1320-
(r'^[Ff]oundation\.?,?$', 'COMP'),
1321-
(r'^[Aa]lliance\.?,?$', 'COMP'),
1319+
(r'^[Cc]orp[\.,\)]*$', 'COMP'),
1320+
(r'^[Cc]orporation[\.,\)]*$', 'COMP'),
1321+
(r'^[Cc][oO][\.,\)]*$', 'COMP'),
1322+
(r'^[Cc]orporations?[\.,\)]*$', 'COMP'),
1323+
(r'^[Cc]onsortium[\.,\)]*$', 'COMP'),
1324+
1325+
(r'^[Ff]oundation[\.,\)]*$', 'COMP'),
1326+
(r'^[Aa]lliance[\.,\)]*$', 'COMP'),
13221327
(r'^Working$', 'COMP'),
1323-
(r'^[Gg]roup\.?,?$', 'COMP'),
1324-
(r'^[Tt]echnology\.?,?$', 'COMP'),
1325-
(r'^[Tt]echnologies\.?,?$', 'COMP'),
1326-
(r'^[Cc]ommunity\.?,?$', 'COMP'),
1327-
(r'^[Cc]ommunities\.?,?$', 'COMP'),
1328-
(r'^[Mm]icrosystems\.?,?$', 'COMP'),
1329-
(r'^[Pp]rojects?\.?,?$', 'COMP'),
1330-
(r'^[Tt]eams?\.?$', 'COMP'),
1331-
(r'^[Tt]ech\.?,?$', 'COMP'),
1332-
(r"^Limited'?\.?,?$", 'COMP'),
1328+
(r'^[Gg]roup[\.,\)]*$', 'COMP'),
1329+
(r'^[Tt]echnolog(y|ies)[\.,\)]*$', 'COMP'),
1330+
(r'^[Cc]ommunit(y|ies)[\.,\)]*$', 'COMP'),
1331+
(r'^[Mm]icrosystems[\.,\)]*$', 'COMP'),
1332+
(r'^[Pp]rojects?[\.,\)]*,?$', 'COMP'),
1333+
(r'^[Tt]eams?[\.,\)]*$', 'COMP'),
1334+
(r'^[Tt]ech[\.,\)]*$', 'COMP'),
1335+
(r"^Limited'?[\.,\)]*$", 'COMP'),
13331336

13341337
# company suffix : LLC, LTD, LLP followed by one extra char
13351338
(r'^[Ll][Tt][Dd]\.?,?$', 'COMP'),
@@ -1341,7 +1344,8 @@ def from_node(
13411344

13421345
# company suffix : SA, SAS, AG, AB, AS, CO, labs followed by a dot
13431346
(r'^(S\.?A\.?S?|Sas|sas|A\/S|AG,?|AB|Labs?|[Cc][Oo]|Research|Center|INRIA|Societe)\.?$', 'COMP'),
1344-
1347+
# French SARL
1348+
(r'^(SARL|S\.A\.R\.L\.)[\.,\)]*$', 'COMP'),
13451349
# company suffix : AS: this is frequent beyond Norway.
13461350
(r'^AS.$', 'COMP'),
13471351
(r'^AS', 'CAPS'),
@@ -1361,11 +1365,13 @@ def from_node(
13611365
# (dutch and belgian) company suffix
13621366
(r'^[Bb]\.?[Vv]\.?|BVBA$', 'COMP'),
13631367
# university
1364-
(r'^\(?[Uu]niv(?:[.]|ersit(?:y|e|at?|ad?))\)?\.?$', 'UNI'),
1368+
(r'^\(?[Uu]niv(?:[.]|ersit(?:y|e|at?|ad?))[\.,\)]*$', 'UNI'),
13651369
(r'^UNIVERSITY$', 'UNI'),
13661370
(r'^College$', 'UNI'),
13671371
# Academia/ie
13681372
(r'^[Ac]cademi[ae]s?$', 'UNI'),
1373+
# Academia/ie
1374+
(r'^[Ac]cademy[\.,\)]*$', 'UNI'),
13691375

13701376
# institutes
13711377
(r'INSTITUTE', 'COMP'),
@@ -1612,11 +1618,11 @@ def from_node(
16121618
# URLS such as <(http://fedorahosted.org/lohit)> or ()
16131619
(r'[<\(]https?:.*[>\)]', 'URL'),
16141620
# URLS such as ibm.com without a scheme
1615-
(r'\s?[a-z0-9A-Z\-\.\_]+\.([Cc][Oo][Mm]|[Nn][Ee][Tt]|[Oo][Rr][Gg]|us|mil|io|edu|co\.[a-z][a-z]|eu|ch|fr|de|be|nl|au|biz)\s?\.?$', 'URL2'),
1621+
(r'\s?[a-z0-9A-Z\-\.\_]+\.([Cc][Oo][Mm]|[Nn][Ee][Tt]|[Oo][Rr][Gg]|us|mil|io|edu|co\.[a-z][a-z]|eu|ch|fr|de|be|se|nl|au|biz)\s?\.?$', 'URL2'),
16161622
# TODO: add more extensions: there are so main TLD these days!
16171623
# URL wrapped in () or <>
1618-
(r'[\(<]+\s?[a-z0-9A-Z\-\.\_]+\.(com|net|org|us|mil|io|edu|co\.[a-z][a-z]|eu|ch|fr|jp|de|be|nl|au|biz)\s?[\.\)>]+$', 'URL'),
1619-
(r'<?a?.(href)?.\(?[a-z0-9A-Z\-\.\_]+\.(com|net|org|us|mil|io|edu|co\.[a-z][a-z]|eu|ch|fr|jp|de|be|nl|au|biz)[\.\)>]?$', 'URL'),
1624+
(r'[\(<]+\s?[a-z0-9A-Z\-\.\_]+\.(com|net|org|us|mil|io|edu|co\.[a-z][a-z]|eu|ch|fr|jp|de|be|se|nl|au|biz)\s?[\.\)>]+$', 'URL'),
1625+
(r'<?a?.(href)?.\(?[a-z0-9A-Z\-\.\_]+\.(com|net|org|us|mil|io|edu|co\.[a-z][a-z]|eu|ch|fr|jp|de|be|se|nl|au|biz)[\.\)>]?$', 'URL'),
16201626
# derived from regex in cluecode.finder
16211627
(r'<?a?.(href)?.('
16221628
r'(?:http|ftp|sftp)s?://[^\s<>\[\]"]+'
@@ -1762,7 +1768,7 @@ def from_node(
17621768
# Commonwealth Scientific and Industrial Research Organisation (CSIRO)
17631769
COMPANY: {<NNP> <NNP> <CC> <NNP> <COMP> <NNP> <CAPS>}
17641770
1765-
COMPANY: {<NNP> <CC> <NNP> <COMP> <NNP>?} #200
1771+
COMPANY: {<NNP> <CC> <NNP> <COMP> <NNP>*} #200
17661772
17671773
# Android Open Source Project, 3Dfx Interactive, Inc.
17681774
COMPANY: {<NN>? <NN> <NNP> <COMP>} #205
@@ -1773,8 +1779,14 @@ def from_node(
17731779
# NNP NN NNP NNP COMP COMP')
17741780
COMPANY: {<NNP> <NN> <NNP> <NNP> <COMP>+} #207
17751781
1776-
# was COMPANY {<NNP|CAPS> <NNP|CAPS>? <NNP|CAPS>? <NNP|CAPS>? <NNP|CAPS>? <NNP|CAPS>? <COMP> <COMP>?} #210
1777-
COMPANY: {<NNP|CAPS>+ <COMP>+} #210
1782+
# Massachusetts Institute of Technology
1783+
COMPANY: {<NNP> <COMP|COMPANY> <OF> <NNP>+} #208
1784+
1785+
COMPANY: {<NNP|CAPS>+ <COMP|COMPANY>+} #210
1786+
1787+
# University of Southern California, Information Sciences Institute (ISI)
1788+
COMPANY: {<UNI> <OF> <COMPANY> <CAPS>?} #211
1789+
17781790
COMPANY: {<UNI|NNP> <VAN|OF> <NNP>+ <UNI>?} #220
17791791
COMPANY: {<NNP>+ <UNI>} #230
17801792
COMPANY: {<UNI> <OF> <NN|NNP>} #240
@@ -1783,6 +1795,9 @@ def from_node(
17831795
# University of Southern California, Information Sciences Institute (ISI)
17841796
COMPANY: {<COMPANY> <COMPANY> <CAPS>} #251
17851797
1798+
# University of Technology
1799+
COMPANY: {<UNI> <OF> <COMP|COMPANY>} #252
1800+
17861801
# GNOME i18n Project for Vietnamese
17871802
COMPANY: {<CAPS> <NN> <COMP> <NN> <NNP>} #253
17881803
@@ -1937,6 +1952,9 @@ def from_node(
19371952
19381953
COMPANY: {<COMPANY> <COMP|COMPANY>} #840
19391954
1955+
# the Software and Component Technologies group of Trimble Navigation, Ltd.
1956+
COMPANY: {<COMPANY> <OF> <COMP|COMPANY>} #840.1
1957+
19401958
# University Corporation for Advanced Internet Development, Inc.
19411959
COMPANY: {<UNI> <COMPANY>} #845
19421960
@@ -2783,6 +2801,7 @@ def refine_names(s, prefixes):
27832801
'copyright 2003 m. y.',
27842802
'copyright 2001 m. y. name',
27852803
'copyright 2001 m. y.',
2804+
'copyright help center',
27862805
])
27872806

27882807
################################################################################

src/licensedcode/data/licenses/bsd-4-clause-shortened.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,6 @@ owner: Regents of the University of California
66
spdx_license_key: BSD-4-Clause-Shortened
77
other_urls:
88
- https://metadata.ftp-master.debian.org/changelogs//main/a/arpwatch/arpwatch_2.1a15-7_copyright
9+
ignorable_authors:
10+
- the University of California, Lawrence Berkeley Laboratory and its contributors
11+

src/licensedcode/data/licenses/cooperative-non-violent-4.0.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,5 @@ category: Proprietary Free
55
owner: Thufie
66
homepage_url: https://thufie.lain.haus/NPL.html
77
spdx_license_key: LicenseRef-scancode-cooperative-non-violent-4.0
8-
ignorable_authors:
9-
- the Web Service
108
ignorable_urls:
119
- https://thufie.lain.haus/NPL.html

0 commit comments

Comments
 (0)