Skip to content

Commit 00a9abc

Browse files
committed
Improve copyright detection of "distributed"
Signed-off-by: Philippe Ombredanne <[email protected]>
1 parent 2354701 commit 00a9abc

24 files changed

+172
-10
lines changed

src/cluecode/copyrights.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@ def detect(self,
280280
'EMAIL', 'URL',
281281
'HOLDER', 'AUTHOR',
282282
'IS', 'HELD',
283-
283+
284284
])
285285

286286
non_holder_labels_mini = frozenset([
@@ -707,7 +707,7 @@ def build_detection_from_node(
707707
(r'^[Rr]éservés[\.,]*$', 'RESERVED'),
708708
(r'^[Rr]eserves[\.,]*$', 'RESERVED'),
709709

710-
# used to detect "copyright is held by..."
710+
# used to detect "copyright is held by..."
711711
(r'^is$', 'IS'),
712712
(r'^are$', 'IS'),
713713
(r'^held$', 'HELD'),
@@ -747,7 +747,7 @@ def build_detection_from_node(
747747

748748
# all lower case with dashes "enforce-trailing-newline" at least 3 times
749749
(r'^((\w+-){3,}\w+)$', 'JUNK'),
750-
750+
751751
# path with trailing year-like are NOT a year as in
752752
# Landroid/icu/impl/IDNA2003 : treat as JUNK
753753
(r'^[^\\/]+[\\/][^\\/]+[\\/].*$', 'JUNK'),
@@ -897,7 +897,7 @@ def build_detection_from_node(
897897
# of a copyright statement
898898
(r'^neither$', 'JUNK'),
899899
(r'^nor$', 'JUNK'),
900-
900+
901901
(r'^data-.*$', 'JUNK'),
902902

903903
(r'^providing$', 'JUNK'),
@@ -1278,7 +1278,7 @@ def build_detection_from_node(
12781278
(r'^Convert$', 'NN'),
12791279
(r'^Compute$', 'NN'),
12801280
(r'^Case$', 'NN'),
1281-
1281+
12821282
(r'^END$', 'NN'),
12831283
(r'^Entity$', 'NN'),
12841284
(r'^Example', 'NN'),
@@ -2096,11 +2096,10 @@ def build_detection_from_node(
20962096

20972097
# dot in: fooo at bar dot com
20982098
(r'^dot$', 'DOT'),
2099-
2099+
21002100
# moment/moment is an odd name
21012101
(r'moment/moment$', 'NAME'),
21022102

2103-
21042103
############################################################################
21052104
# catch all other as Nouns
21062105
############################################################################
@@ -2734,6 +2733,9 @@ def build_detection_from_node(
27342733
# Copyright 2013-2020 by OCamlPro.
27352734
COPYRIGHT2: {<COPY>+ <YR-RANGE>+ <BY> <NN|NNP> } #22795
27362735
2736+
# Copyright 2018 (c) DistributedLock
2737+
COPYRIGHT: {<COPY> <YR-RANGE> <COPY> <NNP>} #230020
2738+
27372739
COPYRIGHT2: {<COPY>+ <NN|CAPS>? <YR-RANGE>+ <PN>*} #2280
27382740
27392741
COPYRIGHT2: {<COPY>+ <NN|CAPS>? <YR-RANGE>+ <NN|CAPS>* <COMPANY>?} #2300
@@ -3007,6 +3009,10 @@ def build_detection_from_node(
30073009
# Copyright OProfile authors
30083010
COPYRIGHT: {<COPY> <NN>?<NNP>+ <AUTHS>} #83004
30093011
3012+
# (C) Distributed Management Task Force (Distributed is an NN)
3013+
COPYRIGHT: {<COPY> <NN> <NAME>} #83010
3014+
3015+
30103016
#######################################
30113017
# Copyright is held by ....
30123018
#######################################
@@ -4101,7 +4107,6 @@ def prepare_text_line(line, dedeb=True, to_ascii=True):
41014107
if TRACE_TOK:
41024108
logger_debug(' prepare_text_line: after remove_printf_format_codes: ' + repr(line))
41034109

4104-
41054110
# less common comment line prefixes
41064111
line = remove_comment_markers(' ', line)
41074112
if TRACE_TOK:
@@ -4175,7 +4180,7 @@ def prepare_text_line(line, dedeb=True, to_ascii=True):
41754180
.replace('`', "'")
41764181
.replace('"', "'")
41774182
# u nicode prefix in Python strings
4178-
.replace(" u'", " '")
4183+
.replace(" u'", " '")
41794184
# see https://github.com/nexB/scancode-toolkit/issues/3667
41804185
.replace('§', " ")
41814186
)

tests/cluecode/data/copyrights/distributed.hpp.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@ what:
44
copyrights:
55
- Copyright 2008-2010 Gordon Woodhull
66
holders:
7-
- Gordon Woodhull
7+
- Gordon Woodhull
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Copyright (c) Distributed Webs Project, LLC. <[email protected]><br>
2+
Copyright (c) 2014 Mathias Buus
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
what:
2+
- copyrights
3+
- holders
4+
- holders_summary
5+
copyrights:
6+
- Copyright (c) Distributed Webs Project, LLC. <[email protected]>
7+
- Copyright (c) 2014 Mathias Buus
8+
holders:
9+
- Distributed Webs Project, LLC.
10+
- Mathias Buus
11+
holders_summary:
12+
- value: Distributed Webs Project, LLC.
13+
count: 1
14+
- value: Mathias Buus
15+
count: 1
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Copyright (c) Distributed Frontera developers.
2+
All rights reserved.
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
what:
2+
- copyrights
3+
- holders
4+
- holders_summary
5+
copyrights:
6+
- Copyright (c) Distributed Frontera developers
7+
holders:
8+
- Distributed Frontera developers
9+
holders_summary:
10+
- value: Distributed Frontera developers
11+
count: 1
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Copyright (C) Distributed InforMation ProcBssing Ltd. Alle Rechte Vorbehalten
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
what:
2+
- copyrights
3+
- holders
4+
- holders_summary
5+
copyrights:
6+
- Copyright (c) Distributed InforMation ProcBssing Ltd.
7+
holders:
8+
- Distributed InforMation ProcBssing Ltd.
9+
holders_summary:
10+
- value: Distributed InforMation ProcBssing Ltd.
11+
count: 1
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
-- (c) Distributed Radio Limited 2016
2+
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
what:
2+
- copyrights
3+
- holders
4+
- holders_summary
5+
copyrights:
6+
- (c) Distributed Radio Limited 2016 - [email protected]
7+
holders:
8+
- Distributed Radio Limited
9+
holders_summary:
10+
- value: Distributed Radio Limited
11+
count: 1

0 commit comments

Comments
 (0)