Skip to content

Commit 0fdcc10

Browse files
committed
Improve copyright detection
* correct detection for genivi #17 * other refeinments and new tests Signed-off-by: Philippe Ombredanne <[email protected]>
1 parent 2ffe5e5 commit 0fdcc10

File tree

2 files changed

+40
-15
lines changed

2 files changed

+40
-15
lines changed

src/cluecode/copyrights.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -722,6 +722,13 @@ def detect(location):
722722
#copyright notice (3dfx Interactive, Inc. 1999), (notice is JUNK)
723723
COPYRIGHT: {<COPY> <JUNK> <COMPANY> <YR-RANGE>} #2620
724724
725+
# Copyright (C) <2013>, GENIVI Alliance, Inc.
726+
COPYRIGHT: {<COPYRIGHT2> <ANDCO>} #2625
727+
728+
# copyright C 1988 by the Institute of Electrical and Electronics Engineers, Inc.
729+
COPYRIGHT: {<COPY> <PN> <YR-RANGE> <BY> <COMPANY> } #2630
730+
731+
725732
# Authors
726733
AUTH: {<AUTH2>+ <BY>} #2640
727734
AUTHOR: {<AUTH>+ <NN>? <COMPANY|NAME|YR-RANGE>* <BY>? <EMAIL>+} #2650
@@ -969,6 +976,7 @@ def is_junk(c):
969976
'(c) if you bring a patent claim against any contributor',
970977
'copyright-check writable-files m4-check author_mark_check',
971978
# 'copyrighting it yourself or claiming authorship'
979+
"copyright of uc berkeley's berkeley software distribution",
972980
])
973981
return c.lower() in junk
974982

@@ -1203,6 +1211,7 @@ def strip_markup(text):
12031211
html_tag_regex = re.compile(
12041212
r'<'
12051213
r'[(--)\?\!\%\/]?'
1214+
r'[a-zA-Z#\"\=\s\.\;\:\%\&?!,\+\*\-_\/]+'
12061215
r'[a-zA-Z0-9#\"\=\s\.\;\:\%\&?!,\+\*\-_\/]+'
12071216
r'\/?>',
12081217
re.MULTILINE | re.UNICODE

tests/cluecode/test_copyrights.py

Lines changed: 31 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -518,7 +518,7 @@ def test_copyright_chameleon_assembly(self):
518518
def test_copyright_co_cust(self):
519519
test_file = self.get_test_loc('copyrights/copyright_co_cust-copyright_java.java')
520520
expected = [
521-
'Copyright (c) 2009 Company Customer Identity Hidden',
521+
'Copyright (c) 2009 <p> Company Customer Identity Hidden',
522522
]
523523
check_detection(expected, test_file)
524524

@@ -3441,7 +3441,7 @@ def test_copyright_regents_complex(self):
34413441
]
34423442
check_detection(expected, test_file)
34433443

3444-
@expectedFailure
3444+
#@expectedFailure
34453445
def test_copyright_regents_license(self):
34463446
test_file = self.get_test_loc('copyrights/copyright_regents_license-LICENSE')
34473447
expected = [
@@ -4072,24 +4072,14 @@ def test_copyright_rim(self):
40724072
check_detection(expected, test_lines)
40734073

40744074
def test_copyright_sinica(self):
4075-
test_lines = ['''
4076-
# Copyright (c) 1999 Computer Systems and Communication Lab,
4077-
# Institute of Information Science, Academia Sinica.
4078-
4079-
some junk
4080-
''']
4081-
expected = ['Copyright (c) 1999 Computer Systems']
4082-
check_detection(expected, test_lines)
4083-
4084-
@expectedFailure
4085-
def test_copyright_sinica_correct(self):
4086-
test_lines = ['''
4075+
test_lines = '''
40874076
# Copyright (c) 1999 Computer Systems and Communication Lab,
40884077
# Institute of Information Science, Academia Sinica.
40894078
40904079
some junk
4091-
''']
4080+
'''.splitlines()
40924081
expected = ['Copyright (c) 1999 Computer Systems and Communication Lab, Institute of Information Science, Academia Sinica.']
4082+
40934083
check_detection(expected, test_lines)
40944084

40954085
def test_copyright_copr1(self):
@@ -4203,3 +4193,29 @@ def test_copyright_word_in_html(self):
42034193
u'Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies)',
42044194
]
42054195
check_detection(expected, test_lines)
4196+
4197+
def test_copyright_with_date_in_angle_brackets(self):
4198+
test_lines = '''
4199+
* Copyright (C) <2013>, GENIVI Alliance, Inc.
4200+
4201+
'''.splitlines(False)
4202+
expected = [
4203+
u'Copyright (c) <2013> , GENIVI Alliance, Inc.',
4204+
]
4205+
check_detection(expected, test_lines, what='copyrights')
4206+
expected = [
4207+
4208+
]
4209+
check_detection(expected, test_lines, what='authors')
4210+
4211+
def test_copyright_with_zoo(self):
4212+
test_lines = '''
4213+
* Download Upload Messaging Manager
4214+
*
4215+
* Copyright (C) 2012-2013 Open-RnD Sp. z o.o. All rights reserved.
4216+
* @verbatim
4217+
'''.splitlines(False)
4218+
expected = [
4219+
u'Copyright (c) 2012-2013 Open-RnD Sp.',
4220+
]
4221+
check_detection(expected, test_lines, what='copyrights')

0 commit comments

Comments
 (0)