Skip to content

Commit ddb993d

Browse files
committed
Merge latest develop
Signed-off-by: Philippe Ombredanne <[email protected]>
2 parents ecece24 + 138abda commit ddb993d

File tree

1,982 files changed

+20073
-2191
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,982 files changed

+20073
-2191
lines changed

etc/scripts/buildrules.py

Lines changed: 67 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -157,14 +157,67 @@ def find_rule_base_loc(license_expression):
157157
idx += 1
158158

159159

160+
def validate_license_rules(rules_data, licensing):
161+
"""
162+
Checks all rules and return a list of errors
163+
"""
164+
errors = []
165+
for rule in rules_data:
166+
is_negative = rule.data.get('is_negative')
167+
is_false_positive = rule.data.get('is_false_positive')
168+
fp_flags = [f for f in [is_negative, is_false_positive] if f]
169+
if len(fp_flags) > 1:
170+
msg = 'Invalid rule with mutually exclusive false positive/negative flags: {}'.format(rule)
171+
errors.append(msg)
172+
if is_negative:
173+
continue
174+
175+
relevance = rule.data.get('relevance', 0) or 0
176+
relevance = float(relevance)
177+
if relevance < 0 or relevance > 100:
178+
msg = 'Invalid rule relevance: {}'.format(rule)
179+
errors.append(msg)
180+
181+
minimum_coverage = rule.data.get('minimum_coverage', 0) or 0
182+
minimum_coverage = float(minimum_coverage)
183+
if minimum_coverage < 0 or minimum_coverage > 100:
184+
msg = 'Invalid rule minimum_coverage: {}'.format(rule)
185+
errors.append(msg)
186+
187+
is_license_notice = rule.data.get('is_license_notice')
188+
is_license_text = rule.data.get('is_license_text')
189+
is_license_reference = rule.data.get('is_license_reference')
190+
is_license_tag = rule.data.get('is_license_tag')
191+
192+
type_flags = [f for f in [is_license_notice, is_license_text, is_license_tag, is_license_reference] if f]
193+
if len(type_flags) != 1:
194+
msg = 'Invalid rule is_license_* flags. Only one allowed. At least one needed: {}'.format(rule)
195+
errors.append(msg)
196+
197+
license_expression = rule.data.get('license_expression')
198+
if not license_expression:
199+
msg = 'Missing license_expression for rule: {}'.format(rule)
200+
errors.append(msg)
201+
202+
try:
203+
licensing.parse(license_expression, validate=True, simple=True)
204+
except Exception as e:
205+
msg = 'Invalid license_expression for rule: {}\n{}'.format(rule, str(e))
206+
errors.append(msg)
207+
208+
return errors
209+
210+
160211
@click.command()
161212
@click.argument('licenses_file', type=click.Path(), metavar='FILE')
162213
@click.help_option('-h', '--help')
163214
def cli(licenses_file):
164215
"""
165-
Create rules from a structured text file
216+
Create rules from a text file with delimited blocks of metadata and texts.
217+
218+
As an example a file would contains one of more blocks such as this:
166219
167-
For instance:
220+
\b
168221
----------------------------------------
169222
license_expression: lgpl-2.1
170223
relevance: 100
@@ -182,6 +235,16 @@ def cli(licenses_file):
182235
licenses = cache.get_licenses_db()
183236
licensing = Licensing(licenses.values())
184237

238+
print()
239+
errors = validate_license_rules(rules_data, licensing)
240+
if errors:
241+
print('Invalid rules: exiting....')
242+
for error in errors:
243+
print(error)
244+
print()
245+
246+
raise Exception('Invalid rules: exiting....')
247+
185248
print()
186249
for rule in rules_data:
187250
is_negative = rule.data.get('is_negative')
@@ -190,14 +253,12 @@ def cli(licenses_file):
190253
if existing and not is_negative:
191254
print('Skipping existing non-negative rule:', existing, 'with text:\n', rule.text[:50].strip(), '...')
192255
continue
193-
256+
194257
if is_negative:
195258
base_name = 'not-a-license'
196259
else:
197260
license_expression = rule.data.get('license_expression')
198-
if not license_expression:
199-
raise Exception('Missing license_expression for text:', rule)
200-
licensing.parse(license_expression, validate=True, simple=True)
261+
license_expression = str(licensing.parse(license_expression, validate=True, simple=True))
201262
base_name = license_expression
202263
if is_false_positive:
203264
base_name = 'false-positive_' + base_name

etc/scripts/synclic.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,7 @@ def build_license(self, mapping, scancode_licenses):
411411
'lgpl-2.0', 'lgpl-2.1', 'lgpl-3.0',
412412
'agpl-1.0', 'agpl-2.0', 'agpl-3.0',
413413
'gfdl-1.1', 'gfdl-1.2', 'gfdl-1.3',
414-
'nokia-qt-exception-1.1', ]):
414+
'nokia-qt-exception-1.1', 'bzip2-1.0.5']):
415415
return
416416

417417
deprecated = mapping.get('isDeprecatedLicenseId', False)
@@ -425,6 +425,8 @@ def build_license(self, mapping, scancode_licenses):
425425
# 'agpl-3.0+'
426426
deprecated = False
427427

428+
# TODO: handle other_spdx_license_keys in license yaml files.
429+
428430
other_urls = mapping.get('seeAlso', [])
429431
other_urls = (o for o in other_urls if o)
430432
other_urls = (o.strip() for o in other_urls)

src/cluecode/copyrights.py

Lines changed: 67 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -451,6 +451,9 @@ def as_str(cls, node, ignores=frozenset(), include_allrights=False):
451451
# such as in (1)(ii)(OCT
452452
(r'^.*\(.*\).*\(.*\).*$', 'JUNK'),
453453

454+
# parens such as (1) or (a) is a sign of junk but of course NOT (c)
455+
(r'^\(([abdefghi\d]|ii|iii)\)$', 'JUNK'),
456+
454457
# found in crypto certificates and LDAP
455458
(r'^O=$', 'JUNK'),
456459
(r'^OU=?$', 'JUNK'),
@@ -502,7 +505,8 @@ def as_str(cls, node, ignores=frozenset(), include_allrights=False):
502505
(r'^[Rr]eleased?$', 'JUNK'),
503506
(r'^[Cc]opyrighting$', 'JUNK'),
504507
(r'^Authori.*$', 'JUNK'),
505-
508+
(r'^such$', 'JUNK'),
509+
(r'^[Aa]ssignments?[.,]?$', 'JUNK'),
506510
(r'^[Bb]uild$', 'JUNK'),
507511
(r'^[Ss]tring$', 'JUNK'),
508512
(r'^Implementation-Vendor$', 'JUNK'),
@@ -618,6 +622,7 @@ def as_str(cls, node, ignores=frozenset(), include_allrights=False):
618622
(r'^Updates$', 'JUNK'),
619623
(r'^Record-keeping$', 'JUNK'),
620624
(r'^Privacy$', 'JUNK'),
625+
(r'^within$', 'JUNK'),
621626

622627
# various trailing words that are junk
623628
(r'^Copyleft$', 'JUNK'),
@@ -666,6 +671,7 @@ def as_str(cls, node, ignores=frozenset(), include_allrights=False):
666671
(r'^However,?$', 'JUNK'),
667672
(r'^[Cc]ollectively$', 'JUNK'),
668673
(r'^following$', 'JUNK'),
674+
(r'^file\.$', 'JUNK'),
669675

670676
# junk when HOLDER(S): typically used in disclaimers instead
671677
(r'^HOLDER\(S\)$', 'JUNK'),
@@ -739,6 +745,8 @@ def as_str(cls, node, ignores=frozenset(), include_allrights=False):
739745
(r'^GA$', 'JUNK'),
740746
(r'^unzip$', 'JUNK'),
741747
(r'^EULA', 'JUNK'),
748+
(r'^Terms?[.,]?$', 'JUNK'),
749+
(r'^Non-Assertion$', 'JUNK'),
742750

743751
# this is not Copr.
744752
(r'^Coproduct,?[,\.]?$$', 'JUNK'),
@@ -747,6 +755,7 @@ def as_str(cls, node, ignores=frozenset(), include_allrights=False):
747755
(r'^CONTRIBUTORS?[,\.]?$', 'JUNK'),
748756
(r'^OTHERS?[,\.]?$', 'JUNK'),
749757
(r'^Contributors?\:[,\.]?$', 'JUNK'),
758+
(r'^Version$', 'JUNK'),
750759

751760
############################################################################
752761
# Nouns and proper Nouns
@@ -846,6 +855,7 @@ def as_str(cls, node, ignores=frozenset(), include_allrights=False):
846855
(r'^GPL\'d', 'NN'),
847856
(r'^Gnome$', 'NN'),
848857
(r'^GnuPG$', 'NN'),
858+
(r'^Government.', 'NNP'),
849859
(r'^Government', 'NN'),
850860
(r'^Grants?\.?,?$', 'NN'),
851861
(r'^Header', 'NN'),
@@ -946,6 +956,7 @@ def as_str(cls, node, ignores=frozenset(), include_allrights=False):
946956
(r'^Section', 'NN'),
947957
(r'^Send$', 'NN'),
948958
(r'^Separa', 'NN'),
959+
(r'^Service$', 'NN'),
949960
(r'^Several$', 'NN'),
950961
(r'^SIGN$', 'NN'),
951962
(r'^Site\.?$', 'NN'),
@@ -983,7 +994,6 @@ def as_str(cls, node, ignores=frozenset(), include_allrights=False):
983994
(r'^VALUE$', 'NN'),
984995
(r'^Various', 'NN'),
985996
(r'^Vendor', 'NN'),
986-
(r'^Version', 'NN'),
987997
(r'^VIEW$', 'NN'),
988998
(r'^Visit', 'NN'),
989999
(r'^Website', 'NN'),
@@ -993,8 +1003,11 @@ def as_str(cls, node, ignores=frozenset(), include_allrights=False):
9931003
(r'^WPA$', 'NN'),
9941004
(r'^Xalan$', 'NN'),
9951005
(r'^YOUR', 'NN'),
1006+
(r'^Your', 'NN'),
9961007
(r'^DateTime', 'NN'),
9971008
(r'^Create$', 'NN'),
1009+
(r'^Engine\.$', 'NN'),
1010+
(r'^While$', 'NN'),
9981011

9991012
# Hours/Date/Day/Month text references
10001013
(r'^am$', 'NN'),
@@ -1091,10 +1104,15 @@ def as_str(cls, node, ignores=frozenset(), include_allrights=False):
10911104
(r'werken$', 'NNP'),
10921105
(r'various\.?$', 'NNP'),
10931106

1107+
# treat Attributable as proper noun as it is seen in Author tags such as in:
1108+
# @author not attributable
1109+
(r'^[Aa]ttributable$', 'NNP'),
1110+
10941111
# rarer caps
10951112
# EPFL-LRC/ICA
10961113
(r'^[A-Z]{3,6}-[A-Z]{3,6}/[A-Z]{3,6}', 'NNP'),
10971114

1115+
10981116
############################################################################
10991117
# Named entities: companies, groups, universities, etc
11001118
############################################################################
@@ -1221,6 +1239,9 @@ def as_str(cls, node, ignores=frozenset(), include_allrights=False):
12211239
(r'^[Cc]ontribut(ors|ing)\.?$', 'CONTRIBUTORS'),
12221240
(r'^contributors,$', 'CONTRIBUTORS'),
12231241

1242+
(r'^Contributor[,.]?$', 'NN'),
1243+
(r'^Licensor[,.]?$', 'NN'),
1244+
12241245
# same for developed, etc...
12251246
(r'^[Cc]oded$', 'AUTH2'),
12261247
(r'^[Rr]ecoded$', 'AUTH2'),
@@ -1416,6 +1437,8 @@ def as_str(cls, node, ignores=frozenset(), include_allrights=False):
14161437
(r'^<([a-zA-Z]+[a-zA-Z\.]){3,}$', 'EMAIL_START'),
14171438
(r'^[a-zA-Z\.]{2,}>$', 'EMAIL_END'),
14181439

1440+
# a .sh shell scripts is NOT an email.
1441+
(r'^.*\.sh\.?$', 'JUNK'),
14191442
# email eventually in parens or brackets with some trailing punct.
14201443
(r'^[\<\(]?[a-zA-Z0-9]+[a-zA-Z0-9\+_\-\.\%]*(@|at)[a-zA-Z0-9][a-zA-Z0-9\+_\-\.\%]+\.[a-zA-Z]{2,5}?[\>\)\.\,]*$', 'EMAIL'),
14211444

@@ -1704,7 +1727,7 @@ def as_str(cls, node, ignores=frozenset(), include_allrights=False):
17041727
# and other contributors
17051728
NAME: {<CC> <NN>? <CONTRIBUTORS>} #644
17061729
1707-
NAME: {<NNP|CAPS>+ <AUTHS|CONTRIBUTORS>} #660
1730+
NAME: {<NNP|CAPS>+ <AUTHS|AUTHDOT|CONTRIBUTORS>} #660
17081731
17091732
NAME: {<VAN|OF> <NAME>} #680
17101733
NAME: {<NAME-YEAR> <COMP|COMPANY>} #690
@@ -1970,7 +1993,17 @@ def as_str(cls, node, ignores=frozenset(), include_allrights=False):
19701993
COPYRIGHT: { <COPY> <COPY> <YR-RANGE> <CONTRIBUTORS> <OTH> } #2276
19711994
19721995
# copyrighted by Object Computing, Inc., St. Louis Missouri, Copyright (C) 2002, all rights reserved.
1973-
COPYRIGHT: {<COPYRIGHT> <COPY>+ <YR-RANGE> <ALLRIGHTRESERVED>} #2290
1996+
COPYRIGHT: {<COPYRIGHT> <COPY>+ <YR-RANGE> <ALLRIGHTRESERVED>} #2278
1997+
1998+
# copyrighted by Object Computing, Inc., St. Louis Missouri, Copyright (C) 2002, all rights reserved.
1999+
COPYRIGHT: {<COPYRIGHT> <COPY>+ <YR-RANGE> <ALLRIGHTRESERVED>} #2279
2000+
2001+
# Copyright (c) 2004, The Codehaus
2002+
COPYRIGHT: {<COPY> <COPY> <YR-RANGE> <NN> <NNP>} #22790
2003+
2004+
# Copyright (c) 2017 odahcam
2005+
COPYRIGHT: {<COPY> <COPY> <YR-RANGE> <NN> <ALLRIGHTRESERVED>} #22791
2006+
COPYRIGHT: {<COPY> <COPY> <YR-RANGE> <NN>} #22792
19742007
19752008
COPYRIGHT2: {<COPY>+ <NN|CAPS>? <YR-RANGE>+ <PN>*} #2280
19762009
@@ -2036,6 +2069,7 @@ def as_str(cls, node, ignores=frozenset(), include_allrights=False):
20362069
# Copyright 2008 TJ <[email protected]>
20372070
COPYRIGHT: {<COPYRIGHT2> <EMAIL>} #2636
20382071
2072+
# Copyright RUSS DILL Russ <[email protected]>
20392073
COPYRIGHT: {<COPYRIGHT> <CAPS> <NAME-EMAIL>} #2637
20402074
20412075
# maintainer Norbert Tretkowski <[email protected]> 2005-04-16
@@ -2137,7 +2171,7 @@ def as_str(cls, node, ignores=frozenset(), include_allrights=False):
21372171
COPYRIGHT: {<COMPANY> <ALLRIGHTRESERVED> <COPYRIGHT2>} #3030
21382172
21392173
# Copyright (c) 2000 United States Government as represented by the Secretary of the Navy. All rights reserved.
2140-
COPYRIGHT: {<COPYRIGHT> <NN> <NN> <NN> <BY> <NN> <NAME> <ALLRIGHTRESERVED>} #3035
2174+
COPYRIGHT: {<COPYRIGHT> <NN> <NN> <NN|NNP> <BY> <NN> <NAME> <ALLRIGHTRESERVED>} #3035
21412175
21422176
# Copyright (c) 2007-2008, Y Giridhar Appaji Nag <[email protected]>
21432177
COPYRIGHT: {<COPYRIGHT> <COMPANY|NAME|NAME-EMAIL|NAME-YEAR>+} #3040
@@ -2209,6 +2243,9 @@ def as_str(cls, node, ignores=frozenset(), include_allrights=False):
22092243
# Copyright (C) 2005-2006 dann frazier <[email protected]>
22102244
COPYRIGHT: {<COPYRIGHT2> <NN> <NN> <EMAIL>} #999991
22112245
2246+
# Copyright (c) 2008 Intel Corporation / Qualcomm Inc.
2247+
COPYRIGHT: {<COPYRIGHT> <DASH> <COMPANY>} #copydash-co
2248+
22122249
#######################################
22132250
# Authors
22142251
#######################################
@@ -2252,13 +2289,22 @@ def as_str(cls, node, ignores=frozenset(), include_allrights=False):
22522289
# developed by the XML DB Initiative http//www.xmldb.org
22532290
AUTHOR: {<AUTH2> <COMPANY>} #2645-7
22542291
2292+
# Author not attributable
2293+
AUTHOR: {<AUTH> <NN> <NNP>} #not attributable
2294+
2295+
# author (Panagiotis Tsirigotis)
2296+
AUTHOR: {<AUTH> <NNP><NNP>+} #author Foo Bar
2297+
2298+
22552299
#######################################
2256-
# Mixed AUTHORS and COPYRIGHTS
2300+
# Mixed AUTHOR and COPYRIGHT
22572301
#######################################
22582302
22592303
# Compounded statements usings authors
2260-
# found in some rare cases with a long list of authors.
2261-
COPYRIGHT: {<COPY> <BY> <AUTHOR>+ <YR-RANGE>*} #2800
2304+
2305+
# Copyright by Daniel K. Gebhart
2306+
# Also found in some rare cases with a long list of authors.
2307+
COPYRIGHT: {<COPY> <BY>? <AUTHOR>+ <YR-RANGE>*} #2800-1
22622308
22632309
COPYRIGHT: {<AUTHOR> <COPYRIGHT2>} #2820
22642310
COPYRIGHT: {<AUTHOR> <YR-RANGE>} #2830
@@ -2312,6 +2358,7 @@ def refine_copyright(c):
23122358
c = strip_balanced_edge_parens(c)
23132359
c = strip_suffixes(c, suffixes=COPYRIGHTS_SUFFIXES)
23142360
c = strip_trailing_period(c)
2361+
c = c.strip("'")
23152362
return c.strip()
23162363

23172364

@@ -2334,8 +2381,6 @@ def refine_holder(h):
23342381
h = h.strip()
23352382
h = strip_trailing_period(h)
23362383
h = h.strip()
2337-
h = strip_balanced_edge_parens(h)
2338-
h = h.strip()
23392384
if h and h.lower() not in HOLDERS_JUNK:
23402385
return h
23412386

@@ -2351,6 +2396,10 @@ def refine_author(a):
23512396
a = a.strip()
23522397
a = strip_trailing_period(a)
23532398
a = a.strip()
2399+
a = strip_balanced_edge_parens(a)
2400+
a = a.strip()
2401+
a = refine_names(a, prefixes=AUTHORS_PREFIXES)
2402+
a = a.strip()
23542403
if a and a.lower() not in AUTHORS_JUNK:
23552404
return a
23562405

@@ -2365,6 +2414,8 @@ def refine_names(s, prefixes):
23652414
s = strip_all_unbalanced_parens(s)
23662415
s = strip_some_punct(s)
23672416
s = s.strip()
2417+
s = strip_balanced_edge_parens(s)
2418+
s = s.strip()
23682419
s = strip_prefixes(s, prefixes)
23692420
s = s.strip()
23702421
return s
@@ -2543,6 +2594,7 @@ def refine_names(s, prefixes):
25432594
'author\'',
25442595
'authors,',
25452596
'authorship',
2597+
'or',
25462598
])
25472599
))
25482600

@@ -2561,6 +2613,11 @@ def refine_names(s, prefixes):
25612613
'company',
25622614
'contributing project',
25632615
'its author',
2616+
'gnomovision',
2617+
'would',
2618+
'may',
2619+
'attributions',
2620+
'the',
25642621
])
25652622

25662623
################################################################################
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
key: 996-icu-1.0
22
short_name: Anti 996 License 1.0
33
name: Anti 996 License Version 1.0 (Draft)
4-
category: Proprietary Free
4+
category: Free Restricted
55
owner: 996icu
66
homepage_url: https://github.com/996icu/996.ICU
7+
notes: this is based on the still draft text of 2019-04-17
78
text_urls:
89
- https://github.com/996icu/996.ICU/blob/dd185162b9d56b629e52c5726995cd7505326b06/LICENSE
9-
notes: this is based on the still draft text of 2019-04-17

0 commit comments

Comments
 (0)