Skip to content

Commit 5215ef4

Browse files
committed
Improve misc. copyright detections
Spotted in some common python libraries such as numpy and scipy Signed-off-by: Philippe Ombredanne <[email protected]>
1 parent 54b2309 commit 5215ef4

File tree

3 files changed

+108
-2
lines changed

3 files changed

+108
-2
lines changed

src/cluecode/copyrights.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1140,6 +1140,16 @@ def build_detection_from_node(
11401140
(r'^[MmNn]odules?[,\.]?$', 'JUNK'),
11411141
(r'^[Rr]eturned$', 'JUNK'),
11421142

1143+
# misc junk
1144+
(r'^False.?$', 'JUNK'),
1145+
(r'^True.?$', 'JUNK'),
1146+
1147+
(r'^imports?$', 'JUNK'),
1148+
(r'^[Ww]arnings?$', 'JUNK'),
1149+
(r'^[Ww]hether$', 'JUNK'),
1150+
(r'^[Bb]oth$', 'JUNK'),
1151+
(r'^[Cc]aller$', 'JUNK'),
1152+
11431153
# tags
11441154
(r'^E-?[Mm]ail:?$', 'JUNK'),
11451155
(r'^URL:?$', 'JUNK'),
@@ -1252,6 +1262,12 @@ def build_detection_from_node(
12521262
(r'^Every$', 'NN'),
12531263
(r'^Digitized', 'NN'),
12541264
(r'^[Ds]istributed?.?$', 'NN'),
1265+
1266+
(r'^Multiply$', 'NN'),
1267+
(r'^Convert$', 'NN'),
1268+
(r'^Compute$', 'NN'),
1269+
(r'^Case$', 'NN'),
1270+
12551271
(r'^END$', 'NN'),
12561272
(r'^Entity$', 'NN'),
12571273
(r'^Example', 'NN'),
@@ -3262,7 +3278,7 @@ def refine_names(s, prefixes):
32623278
r'^copyright \(c\)$',
32633279
r'^\(c\) by$',
32643280

3265-
r"\(c\) [A-Z][a-z] \(c\)",
3281+
r"\(c\) [a-z][a-z] \(c\)",
32663282
r"^copyright holder or simply",
32673283
r"^copyright notice\.",
32683284
r"^copyright of uc berkeley's berkeley software distribution",
@@ -3326,6 +3342,8 @@ def refine_names(s, prefixes):
33263342
r'^u\.s\. copyright act',
33273343
r'^\(c\) Object c$',
33283344
r'^copyright headers?',
3345+
r'Copyright \(c\) 2021 Dot',
3346+
r'^\(c\) \(c\) B$'
33293347
]
33303348

33313349
# a collection of junk junk matcher callables
@@ -3551,7 +3569,7 @@ def remove_dupe_copyright_words(c):
35513569

35523570
def remove_some_extra_words_and_punct(c):
35533571
"""
3554-
Remove misc junk includein some punctuations
3572+
Remove misc junk including some punctuations
35553573
"""
35563574
c = c.replace('<p>', ' ')
35573575
c = c.replace('<a href', ' ')
@@ -4135,6 +4153,8 @@ def prepare_text_line(line, dedeb=True, to_ascii=True):
41354153
# backticks ` and "
41364154
.replace('`', "'")
41374155
.replace('"', "'")
4156+
# u nicode prefix in Python strings
4157+
.replace(" u'", " '")
41384158
# see https://github.com/nexB/scancode-toolkit/issues/3667
41394159
.replace('§', " ")
41404160
)
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
copyright u'2017-2018, NumPy Developers
2+
3+
4+
5+
Copyright (c) 2017
6+
(c) Convert Chebyshev
7+
(c) Multiply a Hermite
8+
(c) Multiply a Laguerre
9+
(c) Multiply a Legendre
10+
(c) Multiply a Chebyshev
11+
12+
(c), True, True, False, False
13+
14+
(c), False, False, False, True
15+
(c), False, False, True, False
16+
17+
(c), (c)
18+
(c) . B' Both
19+
(c) B Whether
20+
21+
22+
23+
Copyright 2001
24+
25+
Copyright 2003
26+
27+
Copyright 2008
28+
29+
30+
31+
(c) 2003, C. Bond
32+
33+
(c) Case 2 Caller
34+
35+
(c) Compute Hessian H
36+
37+
# Author: Jake Vanderplas -- <[email protected]>
38+
# License: BSD 3 clause (C) 2011
39+
import warnings
40+
41+
42+
# Copyright (c) 2011, 2012
43+
# Authors: Pietro Berkes,
44+
45+
46+
47+
# Author: Jake Vanderplas -- <[email protected]>
48+
# License: BSD 3 clause (C) 2011
49+
50+
import numpy as np
51+
52+
53+
54+
# extra quote
55+
56+
copyright = u"2010-2020, Benjamin Peterson"
57+
58+
59+
(c) KOKOKOKOKKuKyKxKxKxKxKxKxKxKxKxKxKxKxKxKxKxKxKxKxKxKxKxKxKxKxKxKxKxKxKxKyK KyKxKzKzKzKzKzKzKzKzKzKzKzKzKyKxKzKzKzKzKzKzKzKzKzKzKzKzKzKzKzKzKzKzKzKzKzKzKzKzK K K KzK K
60+
61+
62+
63+
# junk in Pyparsing
64+
Don't get excited!
65+
I said "Don't get excited!"
66+
Copyright © 2021
67+
Dot ⟶ ˙
68+
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
what:
2+
- copyrights
3+
- holders
4+
copyrights:
5+
- copyright 2017-2018, NumPy Developers
6+
- Copyright (c) 2017
7+
- Copyright 2001
8+
- Copyright 2003
9+
- Copyright 2008
10+
- (c) 2003, C. Bond
11+
- (c) 2011
12+
- Copyright (c) 2011, 2012
13+
- (c) 2011
14+
- copyright 2010-2020, Benjamin Peterson
15+
holders:
16+
- NumPy Developers
17+
- C. Bond
18+
- Benjamin Peterson

0 commit comments

Comments
 (0)