2525from pygmars import Token
2626from pygmars .tree import Tree
2727
28-
2928from cluecode import copyrights_hint
3029from textcode .markup import strip_known_markup_from_text
3130
@@ -107,8 +106,24 @@ def detect_copyrights(
107106 Strip markup from text if ``demarkup`` is True.
108107 Run for up to ``deadline`` seconds and return results found so far.
109108 """
109+ from cluecode .linux_credits import detect_credits_authors
110+
110111 from textcode .analysis import numbered_text_lines
111112
113+ if include_authors :
114+ author_detections = list (detect_credits_authors (location ))
115+
116+ if TRACE :
117+ logger_debug ('detect_copyrights: detect_credits_authors' )
118+ for detecta in author_detections :
119+ logger_debug (f' { detecta } ' )
120+
121+ # bail out if we have a credits file with credits
122+ if author_detections :
123+ for a in author_detections :
124+ yield a
125+ return
126+
112127 numbered_lines = list (numbered_text_lines (location , demarkup = True ))
113128
114129 if TRACE or TRACE_TOK :
@@ -661,8 +676,9 @@ def build_detection_from_node(
661676 # Slovenian: avtorske pravice
662677 # Ukrainian: авторське право
663678
664- # rare typo copyrighy
679+ # rare typos incopyright
665680 (r'^Copyrighy$' , 'COPY' ),
681+ (r'^Copyirght$' , 'COPY' ),
666682
667683 # OSGI
668684 (r'^Bundle-Copyright' , 'COPY' ),
@@ -904,6 +920,7 @@ def build_detection_from_node(
904920 (r'^[Ss]tring$' , 'JUNK' ),
905921 (r'^Implementation-Vendor$' , 'JUNK' ),
906922 (r'^dnl$' , 'JUNK' ),
923+ (r'^ifndef$' , 'JUNK' ),
907924
908925 (r'^as$' , 'NN' ),
909926 (r'^[Vv]isit$' , 'JUNK' ),
@@ -939,7 +956,6 @@ def build_detection_from_node(
939956 (r'^Add$' , 'JUNK' ),
940957 (r'^Average$' , 'JUNK' ),
941958 (r'^Taken$' , 'JUNK' ),
942- (r'^LAWS\.?$' , 'JUNK' ),
943959 (r'^design$' , 'JUNK' ),
944960 (r'^Driver$' , 'JUNK' ),
945961 (r'^[Cc]ontribution\.?' , 'JUNK' ),
@@ -949,7 +965,7 @@ def build_detection_from_node(
949965 (r'^Last-Translator$' , 'JUNK' ),
950966 (r'^Translated$' , 'JUNK' ),
951967 (r'^OMAP730$' , 'JUNK' ),
952- ( r'^Law\.$' , 'JUNK' ),
968+
953969 (r'^dylid$' , 'JUNK' ),
954970 (r'^BeOS$' , 'JUNK' ),
955971 (r'^Generates?$' , 'JUNK' ),
@@ -991,7 +1007,6 @@ def build_detection_from_node(
9911007 (r'^Disclaimer$' , 'JUNK' ),
9921008 (r'^Directive.?$' , 'JUNK' ),
9931009 (r'^LAWS\,?$' , 'JUNK' ),
994- (r'^[Ll]aws?,?$' , 'JUNK' ),
9951010 (r'^me$' , 'JUNK' ),
9961011 (r'^Derived$' , 'JUNK' ),
9971012 (r'^Limitations?$' , 'JUNK' ),
@@ -1062,7 +1077,15 @@ def build_detection_from_node(
10621077 (r'^Much$' , 'JUNK' ),
10631078 (r'^remains?,?$' , 'JUNK' ),
10641079 (r'^earlier$' , 'JUNK' ),
1065- (r'^[lL]aws?$' , 'JUNK' ),
1080+
1081+ # there is a Mr. Law
1082+ (r'^Law[\.,]?$' , 'NN' ),
1083+ (r'^laws?[\.,]?$' , 'JUNK' ),
1084+ (r'^Laws[\.,]?$' , 'JUNK' ),
1085+ (r'^LAWS?[\.,]?$' , 'JUNK' ),
1086+ (r'^LAWS?$' , 'NN' ),
1087+
1088+ (r'^taken$' , 'NN' ),
10661089 (r'^Insert$' , 'JUNK' ),
10671090 (r'^url$' , 'JUNK' ),
10681091 (r'^[Ss]ee$' , 'JUNK' ),
@@ -1083,6 +1106,7 @@ def build_detection_from_node(
10831106 (r'^[Ii]nterfaces?,?$' , 'JUNK' ),
10841107 (r'^than$' , 'JUNK' ),
10851108 (r'^whom$' , 'JUNK' ),
1109+ (r'^Definitions?$' , 'JUNK' ),
10861110 (r'^However,?$' , 'JUNK' ),
10871111 (r'^[Cc]ollectively$' , 'JUNK' ),
10881112 (r'^following$' , 'FOLLOWING' ),
@@ -1190,7 +1214,8 @@ def build_detection_from_node(
11901214 (r'^[a-z]{3,10}[A-Z][a-z]{3,10}$' , 'JUNK' ),
11911215
11921216 (r'^\$?Guid$' , 'JUNK' ),
1193- #(r'^Small$', 'NN'),
1217+ # there is a Mr Small
1218+ # (r'^Small$', 'NN'),
11941219 (r'^implementing$' , 'JUNK' ),
11951220 (r'^Unlike$' , 'JUNK' ),
11961221 (r'^using$' , 'JUNK' ),
@@ -1271,6 +1296,7 @@ def build_detection_from_node(
12711296 (r'^[Ss]tatements?.?$' , 'JUNK' ),
12721297 (r'^issues?.?$' , 'JUNK' ),
12731298 (r'^retain?.?$' , 'JUNK' ),
1299+ (r'^Sun3x$' , 'JUNK' ),
12741300
12751301 ############################################################################
12761302 # Nouns and proper Nouns
@@ -1281,7 +1307,7 @@ def build_detection_from_node(
12811307 (r'^This_file_is_part_of_KDE$' , 'NAME' ),
12821308
12831309 # K.K. (a company suffix), needs special handling
1284- (r'^K.K.,?$' , 'NAME ' ),
1310+ (r'^K.K.,?$' , 'COMP ' ),
12851311
12861312 # MIT is problematic
12871313 # With a comma, always CAPS (MIT alone is too error prone to be always tagged as CAPS
@@ -1362,6 +1388,7 @@ def build_detection_from_node(
13621388 (r'^DATED$' , 'NN' ),
13631389 (r'^Delay' , 'NN' ),
13641390 (r'^Derivative' , 'NN' ),
1391+ (r'^Direct$' , 'NN' ),
13651392 (r'^DISCLAIMED' , 'NN' ),
13661393 (r'^Docs?$' , 'NN' ),
13671394 (r'^DOCUMENTATION' , 'NN' ),
@@ -1451,10 +1478,13 @@ def build_detection_from_node(
14511478 (r'^GPLd?\.?$' , 'NN' ),
14521479 (r'^GPL\'d$' , 'NN' ),
14531480 (r'^Gnome$' , 'NN' ),
1481+ (r'^Port$' , 'NN' ),
14541482 (r'^GnuPG$' , 'NN' ),
14551483 (r'^Government.' , 'NNP' ),
14561484 (r'^OProfile$' , 'NNP' ),
14571485 (r'^Government$' , 'COMP' ),
1486+ # there is a Ms. Grant
1487+ (r'^Grant$' , 'NNP' ),
14581488 (r'^Grants?\.?,?$' , 'NN' ),
14591489 (r'^Header' , 'NN' ),
14601490 (r'^HylaFAX$' , 'NN' ),
@@ -1491,7 +1521,6 @@ def build_detection_from_node(
14911521 (r'^List$' , 'NN' ),
14921522 (r'^Set$' , 'NN' ),
14931523 (r'^Last$' , 'NN' ),
1494- (r'^LAW' , 'NN' ),
14951524 (r'^Legal$' , 'NN' ),
14961525 (r'^LegalTrademarks$' , 'NN' ),
14971526 (r'^Library$' , 'NN' ),
@@ -1644,6 +1673,11 @@ def build_detection_from_node(
16441673 (r'^CodeMirror$' , 'NN' ),
16451674 (r'^They$' , 'JUNK' ),
16461675 (r'^Branched$' , 'NN' ),
1676+ (r'^Partial$' , 'NN' ),
1677+ (r'^Fixed$' , 'NN' ),
1678+ (r'^Later$' , 'NN' ),
1679+ (r'^Rear$' , 'NN' ),
1680+ (r'^Left$' , 'NN' ),
16471681
16481682 (r'^Improved$' , 'NN' ),
16491683 (r'^Designed$' , 'NN' ),
@@ -1712,11 +1746,12 @@ def build_detection_from_node(
17121746 (r'^Compression$' , 'NN' ),
17131747 (r'^Letter$' , 'NN' ),
17141748 (r'^Moved$' , 'NN' ),
1749+ (r'^More$' , 'NN' ),
17151750 (r'^Phone$' , 'NN' ),
1751+ (r'^[Tt]ests?$' , 'JUNK' ),
17161752
17171753 (r'^Inputs?$' , 'NN' ),
17181754
1719-
17201755 # dual caps that are not NNP
17211756 (r'^Make[A-Z]' , 'JUNK' ),
17221757 (r'^Create[A-Z]' , 'JUNK' ),
@@ -1904,12 +1939,11 @@ def build_detection_from_node(
19041939 (r'^(S\.?A\.?S?|Sas|sas|A\/S|AG,?|AB|Labs?|[Cc][Oo]|Research|Center|INRIA|Societe|KG)[,\.]?$' , 'COMP' ),
19051940 # French SARL
19061941 (r'^(SARL|S\.A\.R\.L\.)[\.,\)]*$' , 'COMP' ),
1907- # More company suffix : a.s. in Czechia and otehrs
1942+ # More company suffix : a.s. in Czechia and others
19081943 (r'^(a\.s\.|S\.r\.l\.?)$' , 'COMP' ),
19091944 (r'^Vertriebsges\.m\.b\.H\.?,?$' , 'COMP' ),
19101945 # Iceland
19111946 (r'^(ehf|hf|svf|ohf)\.,?$' , 'COMP' ),
1912-
19131947 # Move company abbreviations
19141948 (r'^(SPRL|srl)[\.,]?$' , 'COMP' ),
19151949 # Poland
@@ -2176,6 +2210,7 @@ def build_detection_from_node(
21762210 (r'^Meridian\'93$' , 'NNP' ),
21772211 (r'^Xiph.Org$' , 'NNP' ),
21782212 (r'^iClick,?$' , 'NNP' ),
2213+ (r'^electronics?$' , 'NNP' ),
21792214
21802215 # proper nouns with digits
21812216 (r'^([A-Z][a-z0-9]+){1,2}[\.,]?$' , 'NNP' ),
@@ -2203,6 +2238,9 @@ def build_detection_from_node(
22032238 (r'^AT$' , '<at>' ),
22042239 (r'^DOT$' , 'DOT' ),
22052240
2241+ # exceptions to CAPS
2242+ (r'^MMC$' , 'JUNK' ),
2243+
22062244 # all CAPS word, at least 1 char long such as MIT, including an optional trailing comma or dot
22072245 (r'^[A-Z0-9]+,?$' , 'CAPS' ),
22082246
@@ -2272,10 +2310,9 @@ def build_detection_from_node(
22722310 (r'__MyCompanyName__[\.,]?$' , 'NAME' ),
22732311
22742312 # email in brackets <brett_AT_jdom_DOT_org>
2275- #(karl AT indy.rr.com)
2276- #<fdlibm-comments AT sun.com>
2313+ # (karl AT indy.rr.com)
2314+ # <fdlibm-comments AT sun.com>
22772315 (r'(?i:^[<\(][\w\.\-\+]+at[\w\.\-\+]+(dot)?[\w\.\-\+]+[/)>]$)' , 'EMAIL' ),
2278-
22792316
22802317 # Code variable names including snake case
22812318 (r'^.*(_.*)+$' , 'JUNK' ),
@@ -2311,7 +2348,6 @@ def build_detection_from_node(
23112348 (r'^(?:=>|->|<-|<=)$' , 'JUNK' ),
23122349
23132350 (r'^semiconductors?[\.,]?$' , 'NNP' ),
2314-
23152351
23162352 ############################################################################
23172353 # catch all other as Nouns
@@ -2589,6 +2625,7 @@ def build_detection_from_node(
25892625 NAME-YEAR: {<YR-RANGE> <NAME-EMAIL|COMPANY>+ <CC> <YR-RANGE>} #540
25902626
25912627 NAME: {<NAME|NAME-EMAIL>+ <OF> <NNP> <OF> <NN>? <COMPANY>} #550
2628+
25922629 NAME: {<NAME|NAME-EMAIL>+ <CC|OF>? <NAME|NAME-EMAIL|COMPANY>} #560
25932630
25942631 NAME: {<NNP><NNP>} #561
@@ -2622,8 +2659,13 @@ def build_detection_from_node(
26222659 #also accept trailing email and URLs
26232660 # and "VAN" e.g. Du: Copyright (c) 2008 Alek Du <[email protected] > 26242661 NAME-YEAR: {<NAME-YEAR> <VAN>? <EMAIL>?<URL>?} #5701
2662+
2663+ # Copyright (C) 2008 Jim Law - Iris LP All rights reserved.
2664+ NAME-YEAR: {<NAME-YEAR> <NN> <DASH> <NAME>} # 5701.1
2665+
26252666 NAME-YEAR: {<NAME-YEAR>+} #5702
26262667
2668+
26272669 NAME: {<NNP> <OF> <NNP>} #580
26282670 NAME: {<NAME> <NNP>} #590
26292671 NAME: {<NN|NNP|CAPS>+ <CC> <OTH>} #600
@@ -2843,6 +2885,10 @@ def build_detection_from_node(
28432885 # Copyright (c) 2013-2015 Streams Standard Reference Implementation Authors
28442886 COPYRIGHT: {<COPY>+ <NAME-YEAR> <NN|NNP>+ <AUTHS>} #1566
28452887
2888+ # Nicolas Pitre, (c) 2002 Monta Vista Software Inc
2889+ # Cliff Brake, (c) 2001
2890+ #COPYRIGHT: {<NAME> <COPY> <NAME-YEAR> <NAME> <COPY> <YR-RANGE>} #1566.1
2891+
28462892 # copyright: Copyright (c) Joe Joyce and contributors, 2016-2019.
28472893 COPYRIGHT: {<COPY>+ <NAME> <CC> <NN> <YR-RANGE>} #1579992
28482894
@@ -3027,8 +3073,11 @@ def build_detection_from_node(
30273073 # Author: Jeff LaBundy <[email protected] > 30283074 COPYRIGHT: {<COPY> <COPY> <YR-RANGE> <AUTH> <NAME-EMAIL>} #2280-3
30293075
3076+
30303077 COPYRIGHT2: {<COPY>+ <NN|CAPS>? <YR-RANGE>+ <PN>*} #2280
30313078
3079+ COPYRIGHT: {<COPYRIGHT2> <BY> <NAME-YEAR|NAME-EMAIL> <BY>? <NAME-YEAR|NAME-EMAIL>? } #2280-4
3080+
30323081 # using #2280 above: Copyright 2018 Developers of the Rand project
30333082 COPYRIGHT: {<COPYRIGHT2> <MAINT> <OF> <COMPANY>} #2280.123
30343083
@@ -3151,7 +3200,8 @@ def build_detection_from_node(
31513200 COPYRIGHT: {<COPYRIGHT2> <CAPS> <CD|CDS> <COMPANY> <NAME>} #2009.1
31523201
31533202 # COPYRIGHT (c) 2006 - 2009 DIONYSOS
3154- COPYRIGHT: {<COPYRIGHT2> <CAPS>} #2009
3203+ # Copyright 2003 ICT CAS
3204+ COPYRIGHT: {<COPYRIGHT2> <CAPS>+} #2009
31553205
31563206 # Copyright (C) 2000 See Beyond Communications Corporation
31573207 COPYRIGHT2: {<COPYRIGHT2> <JUNK> <COMPANY>} # 2010
@@ -3349,7 +3399,7 @@ def build_detection_from_node(
33493399 #Copyright (C) 2012-2016 by the following authors:
33503400 #- Wladimir J. van der Laan <[email protected] > 33513401
3352- NAME-EMAIL: {<NNP> <NAME-EMAIL> } #157999.13
3402+ NAME-EMAIL: {<NNP> <NAME-EMAIL> } #157999.13
33533403 NAME-EMAIL: {<DASH> <NAME-EMAIL> <NN>?} #157999.14
33543404 COPYRIGHT: {<COPYRIGHT2> <FOLLOWING> <AUTHS> <NAME-EMAIL>+ } #157999.14
33553405
@@ -3888,6 +3938,8 @@ def is_junk_copyright(s, patterns=COPYRIGHTS_JUNK_PATTERN_MATCHERS):
38883938 'a' ,
38893939 '</p>' ,
38903940 'or' ,
3941+ 'taken' ,
3942+ 'from' ,
38913943])
38923944
38933945# these final holders are ignored.
@@ -4398,7 +4450,7 @@ def remove_code_comment_markers(s):
43984450 Return ``s`` removing code comments such as C and C++ style comment markers and assimilated
43994451
44004452 >>> remove_code_comment_markers(r"\\ *#%; /\\ /*a*/b/*c\\ d#e%f \\ *#%; /")
4401- 'a b c\\ \d e f'
4453+ 'a b c\\ \\ d e f'
44024454 """
44034455 return (s
44044456 .replace ('/*' , ' ' )
@@ -4474,7 +4526,7 @@ def prepare_text_line(line):
44744526 .replace ('\\ XA9' , ' (c) ' )
44754527 .replace ('\\ A9' , ' (c) ' )
44764528 .replace ('\\ a9' , ' (c) ' )
4477- .replace ('<A9>' , ' (c) ' )
4529+ .replace ('<A9>' , ' (c) ' )
44784530 .replace ('XA9;' , ' (c) ' )
44794531 .replace ('Xa9;' , ' (c) ' )
44804532 .replace ('xA9;' , ' (c) ' )
@@ -4525,7 +4577,7 @@ def prepare_text_line(line):
45254577 .replace ('year>' , " " )
45264578 .replace ('<year>' , " " )
45274579 .replace ('<name>' , " " )
4528-
4580+
45294581 )
45304582
45314583 if TRACE_TOK :
0 commit comments