Skip to content

Commit 36efea3

Browse files
committed
Copyright detection improvements
* Correct detection for #168 with some new exception to proper nouns * Test is now passing for #191 * New test for #130 Signed-off-by: Philippe Ombredanne <[email protected]>
1 parent 20207d6 commit 36efea3

File tree

2 files changed

+47
-6
lines changed

2 files changed

+47
-6
lines changed

src/cluecode/copyrights.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,9 @@ def detect(location):
346346
# exceptions to proper nouns
347347
(r'^(The|Commons|AUTHOR|software)$', 'NN'),
348348

349+
# exceptions to proper noun
350+
(r"^(Natural|Docs?)$", 'NN'),
351+
349352
# composed proper nouns, ie. Jean-Claude or ST-Microelectronics
350353
# FIXME: what about a variant with spaces around the dash?
351354
(r'^[A-Z][a-zA-Z]*\s?[\-]\s?[A-Z]?[a-zA-Z]+.?$', 'NNP'),
@@ -571,6 +574,9 @@ def detect(location):
571574
# XZY emails
572575
COMPANY: {<COMPANY> <EMAIL>+} #1400
573576
577+
# by the a href http wtforms.simplecodes.com WTForms Team
578+
COMPANY: {<BY> <NN>+ <COMP|COMPANY>} #1420
579+
574580
575581
# "And" some name
576582
ANDCO: {<CC>+ <NN> <NNP>+<UNI|COMP>?} #1430

tests/cluecode/test_copyrights.py

Lines changed: 41 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3966,10 +3966,9 @@ def test_copyright_should_not_contain_leading_or_trailing_colon(self):
39663966
expected = ['copyright (c) 2013 by Armin Ronacher.']
39673967
check_detection(expected, test_file)
39683968

3969-
@expectedFailure
39703969
def test_copyright_in_markup_should_not_be_truncated(self):
39713970
test_file = self.get_test_loc('copyrights/copyright_in_html.html')
3972-
expected = ['(c) Copyright 2010 by the WTForms Team']
3971+
expected = ["(c) Copyright 2010 by the <a href http://wtforms.simplecodes.com'>WTForms Team"]
39733972
check_detection(expected, test_file)
39743973

39753974
def test_copyright_should_not_have_trailing_garbage(self):
@@ -4138,24 +4137,60 @@ def test_copyright_copr5_correct(self):
41384137

41394138
def test_copyright_oracle(self):
41404139
test_lines = ['Copyright (c) 1997-2015 Oracle and/or its affiliates. All rights reserved.']
4141-
41424140
expected = ['Copyright (c) 1997-2015 Oracle and/or its affiliates.']
41434141
check_detection(expected, test_lines)
41444142

41454143
def test_copyright_windows(self):
41464144
test_lines = ['This release supports NT-based Windows releases like Windows 2000 SP4, Windows XP, and Windows 2003.']
4147-
41484145
expected = []
41494146
check_detection(expected, test_lines)
41504147

41514148
def test_copyright_in_binary_sql_server(self):
41524149
test_lines = ['2005charchar? 7 DDLSQL Server 2005smalldatetimedatetimeLDDDDDD7']
4153-
41544150
expected = []
41554151
check_detection(expected, test_lines)
41564152

41574153
def test_copyright_with_example_com_url(self):
41584154
test_lines = ['"domain": function(c) { assert.equal(c.domain, "example.com") },']
4159-
41604155
expected = []
41614156
check_detection(expected, test_lines)
4157+
4158+
def test_copyright_various(self):
4159+
test_lines = '''
4160+
libwmf (<libwmf/api.h>): library for wmf conversion
4161+
Copyright (C) 2000 - various; see CREDITS, ChangeLog, and sources
4162+
The libwmf Library is free software; you can redistribute it and/or
4163+
'''.splitlines(False)
4164+
expected = ['Copyright (c) 2000 - various'] # ; see CREDITS, ChangeLog, and sources
4165+
check_detection(expected, test_lines)
4166+
4167+
def test_copyright_natural_docs(self):
4168+
test_lines = '''
4169+
// Search script generated by doxygen
4170+
// Copyright (C) 2009 by Dimitri van Heesch.
4171+
4172+
// The code in this file is loosly based on main.js, part of Natural Docs,
4173+
// which is Copyright (C) 2003-2008 Greg Valure
4174+
// Natural Docs is licensed under the GPL.
4175+
'''.splitlines(False)
4176+
expected = [
4177+
u'Copyright (c) 2009 by Dimitri van Heesch.',
4178+
u'Copyright (c) 2003-2008 Greg Valure'
4179+
]
4180+
check_detection(expected, test_lines)
4181+
4182+
def test_copyright_and_authors_mixed(self):
4183+
test_lines = '''
4184+
* Copyright (c) 1998 Softweyr LLC. All rights reserved.
4185+
*
4186+
* strtok_r, from Berkeley strtok
4187+
* Oct 13, 1998 by Wes Peters <[email protected]>
4188+
*
4189+
* Copyright (c) 1988, 1993
4190+
* The Regents of the University of California. All rights reserved.
4191+
'''.splitlines(False)
4192+
expected = [
4193+
u'Copyright (c) 1998 Softweyr LLC.',
4194+
u'Copyright (c) 1988, 1993 The Regents of the University of California.'
4195+
]
4196+
check_detection(expected, test_lines)

0 commit comments

Comments
 (0)