Skip to content

Commit e623ef6

Browse files
committed
Improve copyright detection
- Allow multiline copyright detection with an empty line if ending with a conjunction or a year. - Do not treat Free as an NNP - Better handle leading and trailing comment line markers - Always enable tracing with deep tracing Signed-off-by: Philippe Ombredanne <[email protected]>
1 parent 868d203 commit e623ef6

File tree

1 file changed

+9
-5
lines changed

1 file changed

+9
-5
lines changed

src/cluecode/copyrights.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
TRACE_DEEP = 0
3232
if os.environ.get('SCANCODE_DEBUG_COPYRIGHT_DEEP'):
3333
TRACE_DEEP = 1
34+
TRACE = False
3435

3536
TRACE_TOK = False or os.environ.get('SCANCODE_DEBUG_COPYRIGHT_TOKEN', False)
3637

@@ -1163,6 +1164,9 @@ def from_node(
11631164
(r'^Engine\.$', 'NN'),
11641165
(r'^While$', 'NN'),
11651166

1167+
# alone this is not enough for an NNP
1168+
(r'^Free$', 'NN'),
1169+
11661170
# Hours/Date/Day/Month text references
11671171
(r'^am$', 'NN'),
11681172
(r'^pm$', 'NN'),
@@ -1736,7 +1740,7 @@ def from_node(
17361740
COMPANY: {<BY>? <NN> <NNP> <OF> <NN> <UNI> <OF> <COMPANY|NAME|NAME-EMAIL><COMP>?} #130
17371741
17381742
# Free Software Foundation, Inc.
1739-
COMPANY: {<NNP> <NNP> <COMP> <COMP>} #135
1743+
COMPANY: {<NN|NNP> <NNP> <COMP> <COMP>} #135
17401744
17411745
# Mediatrix Telecom, inc. <[email protected]>
17421746
COMPANY: {<NNP>+ <COMP> <EMAIL>} #136
@@ -2667,6 +2671,7 @@ def refine_names(s, prefixes):
26672671
'author',
26682672
'all',
26692673
'some',
2674+
'and'
26702675
])
26712676

26722677
# Set of statements that get detected and are junk/false positive
@@ -3404,15 +3409,14 @@ def prepare_text_line(line, dedeb=True, to_ascii=True):
34043409
)
34053410
line = remove_printf_format_codes(' ', line)
34063411

3407-
# un common comment line prefixes
3412+
# less common comment line prefixes
34083413
line = remove_comment_markers(' ', line)
34093414
line = remove_man_comment_markers(' ', line)
34103415

34113416
line = (line
3412-
# C and C++ style markers
3413-
.replace('^//', ' ')
3417+
# C and C++ style comment markers
34143418
.replace('/*', ' ').replace('*/', ' ')
3415-
3419+
.strip().strip('/*#')
34163420
# un common pipe chars in some ascii art
34173421
.replace('|', ' ')
34183422

0 commit comments

Comments
 (0)