@@ -806,6 +806,12 @@ def build_detection_from_node(
806806 # verbatime star
807807 (r'^\*$' , 'JUNK' ),
808808
809+ # misc company names exception to next rule
810+ (r'^TinCanTools$' , 'NNP' ),
811+ (r'^SoftwareBitMaker$' , 'NNP' ),
812+ (r'^NetCommWireless$' , 'NNP' ),
813+
814+ # Repeated CamelCasedWords
809815 (r'^([A-Z][a-z]+){3,}$' , 'JUNK' ),
810816
811817 ############################################################################
@@ -1079,7 +1085,7 @@ def build_detection_from_node(
10791085 (r'^whom$' , 'JUNK' ),
10801086 (r'^However,?$' , 'JUNK' ),
10811087 (r'^[Cc]ollectively$' , 'JUNK' ),
1082- (r'^following$' , 'JUNK ' ),
1088+ (r'^following$' , 'FOLLOWING ' ),
10831089 (r'^[Cc]onfig$' , 'JUNK' ),
10841090 (r'^file\.$' , 'JUNK' ),
10851091
@@ -1184,7 +1190,7 @@ def build_detection_from_node(
11841190 (r'^[a-z]{3,10}[A-Z][a-z]{3,10}$' , 'JUNK' ),
11851191
11861192 (r'^\$?Guid$' , 'JUNK' ),
1187- (r'^Small$' , 'NN' ),
1193+ # (r'^Small$', 'NN'),
11881194 (r'^implementing$' , 'JUNK' ),
11891195 (r'^Unlike$' , 'JUNK' ),
11901196 (r'^using$' , 'JUNK' ),
@@ -1206,6 +1212,11 @@ def build_detection_from_node(
12061212 # single period
12071213 (r"^\.$" , 'JUNK' ),
12081214
1215+ # exception to the next rule
1216+
1217+ # by PaX Team
1218+ (r"PaX$" , 'NN' ),
1219+
12091220 # short mixed caps with trailing cap: ZoY
12101221 (r"[A-Z][a-z][A-Z]$" , 'JUNK' ),
12111222
@@ -1405,6 +1416,7 @@ def build_detection_from_node(
14051416 (r'^STA$' , 'NN' ),
14061417 (r'^Page$' , 'NN' ),
14071418 (r'^Todo/Under$' , 'JUNK' ),
1419+ (r'^Under$' , 'NN' ),
14081420
14091421 (r'^Interrupt$' , 'NN' ),
14101422 (r'^cleanups?$' , 'JUNK' ),
@@ -1668,6 +1680,8 @@ def build_detection_from_node(
16681680 (r'^([Mm]onday|[Tt]uesday|[Ww]ednesday|[Tt]hursday|[Ff]riday|[Ss]aturday|[Ss]unday),?$' , 'DAY' ),
16691681 (r'^(Mon|Tue|Wed|Thu|Fri|Sat|Sun|May),?$' , 'NN' ),
16701682
1683+ (r'^[Dd]ebugging$' , 'JUNK' ),
1684+
16711685 # misc words that are not NNs
16721686 # lowercase verbs ending in "ing"
16731687 (r'^[a-z]+ing$' , 'NN' ),
@@ -1700,6 +1714,9 @@ def build_detection_from_node(
17001714 (r'^Moved$' , 'NN' ),
17011715 (r'^Phone$' , 'NN' ),
17021716
1717+ (r'^Inputs?$' , 'NN' ),
1718+
1719+
17031720 # dual caps that are not NNP
17041721 (r'^Make[A-Z]' , 'JUNK' ),
17051722 (r'^Create[A-Z]' , 'JUNK' ),
@@ -2069,6 +2086,7 @@ def build_detection_from_node(
20692086 # and Spanish/French Da Siva and De Gaulle
20702087 (r'^(([Vv][ao]n)|[Dd][aeu])$' , 'VAN' ),
20712088
2089+ (r'^aan$' , 'OF' ),
20722090 (r'^van$' , 'VAN' ),
20732091 (r'^Van$' , 'VAN' ),
20742092 (r'^von$' , 'VAN' ),
@@ -2289,6 +2307,9 @@ def build_detection_from_node(
22892307 # some punctuation combos
22902308 (r'^(?:=>|->|<-|<=)$' , 'JUNK' ),
22912309
2310+ (r'^semiconductors?[\.,]?$' , 'NNP' ),
2311+
2312+
22922313 ############################################################################
22932314 # catch all other as Nouns
22942315 ############################################################################
@@ -2320,6 +2341,10 @@ def build_detection_from_node(
23202341
23212342 CD: {<BARE-YR>} #bareyear
23222343
2344+ # 5 Jan 2003
2345+ YR-RANGE: {<CD> <NNP> <YR-RANGE>} #72.3
2346+
2347+
23232348#######################################
23242349# All/No/Some Rights Reserved
23252350#######################################
@@ -2344,6 +2369,9 @@ def build_detection_from_node(
2344236923452370 EMAIL: {<EMAIL> <NN> <EMAIL>} # email or email
23462371
2372+ # <srinivasa.deevi at conexant dot com>
2373+ EMAIL: {<EMAIL_START> <CC> <NN> <DOT> <NN> } #email with brackets
2374+
23472375#######################################
23482376# NAMES and COMPANIES
23492377#######################################
@@ -2559,18 +2587,21 @@ def build_detection_from_node(
25592587 NAME: {<NAME|NAME-EMAIL>+ <OF> <NNP> <OF> <NN>? <COMPANY>} #550
25602588 NAME: {<NAME|NAME-EMAIL>+ <CC|OF>? <NAME|NAME-EMAIL|COMPANY>} #560
25612589
2562- NAME: {<NNP><NNP>} #5611
2590+ NAME: {<NNP><NNP>} #561
25632591
25642592 # strip Software from Copyright (c) Ian Darwin 1995. Software
2565- NAME-YEAR: {<NAME>+ <YR-RANGE>} #5611
2593+ NAME-YEAR: {<NAME>+ <YR-RANGE>} #561.1
25662594
25672595 # Copyright 2018, OpenCensus Authors
2568- COPYRIGHT: {<COPY>+ <YR-RANGE> <NNP> <AUTHS>} #1579991
2596+ COPYRIGHT: {<COPY>+ <YR-RANGE> <NNP> <AUTHS>} #561.2
2597+
2598+ # Tom aan de Wiel
2599+ NAME: {<NNP> <OF> <VAN> <NNP> } # 561.3
25692600
2570- NAME-YEAR: {<YR-RANGE> <NNP>+ <CAPS>? <LINUX>?} #5612
2601+ NAME-YEAR: {<YR-RANGE> <NNP>+ <CAPS>? <LINUX>?} #562
25712602
25722603 #Academy of Motion Picture Arts and Sciences
2573- NAME: {<NAME> <CC> <NNP>} #561
2604+ NAME: {<NAME> <CC> <NNP>} #563
25742605
25752606 # Adam Weinberger and the GNOME Foundation
25762607 ANDCO: {<CC> <NN> <COMPANY>} #565
@@ -2582,6 +2613,8 @@ def build_detection_from_node(
25822613
25832614 URL: {<PARENS> <URL> <PARENS>} #5700
25842615
2616+ NAME-YEAR: {<NAME-YEAR> <CD> <NNP>} #5700.1
2617+
25852618 #also accept trailing email and URLs
25862619 # and "VAN" e.g. Du: Copyright (c) 2008 Alek Du <[email protected] > 25872620 NAME-YEAR: {<NAME-YEAR> <VAN>? <EMAIL>?<URL>?} #5701
@@ -2984,7 +3017,11 @@ def build_detection_from_node(
29843017
29853018 # Russ Dill <[email protected] > 2001-2003 29863019 # Rewrited by Vladimir Oleynik <[email protected] > (C) 2003 2987- COPYRIGHT: {<NAME-EMAIL> <YR-RANGE> <AUTH2> <BY> <NAME-EMAIL> <COPY> <YR-RANGE>} #22793.5
3020+ COPYRIGHT: {<NAME-EMAIL> <YR-RANGE> <AUTH2> <BY> <NAME-EMAIL> <COPY> <YR-RANGE>} #2280-2
3021+
3022+ # Copyright (C) 2018
3023+ # Author: Jeff LaBundy <[email protected] > 3024+ COPYRIGHT: {<COPY> <COPY> <YR-RANGE> <AUTH> <NAME-EMAIL>} #2280-3
29883025
29893026 COPYRIGHT2: {<COPY>+ <NN|CAPS>? <YR-RANGE>+ <PN>*} #2280
29903027
@@ -3259,6 +3296,9 @@ def build_detection_from_node(
32593296 # copyrighted by the Open Source Vulnerability Database (http://osvdb.org)
32603297 COPYRIGHT: {<COPY> <BY> <NN|NNP>{3} <NAME>} #83002.1
32613298
3299+ # (C) by the respective authors,
3300+ <COPYRIGHT>: { <COPY> <BY> <NN> <NN> <AUTHDOT>} #83002.2
3301+
32623302 # weird //opylefted by <-Harvie 2oo7
32633303 COPYRIGHT: {<COPY> <BY> <NN> <NN> <MAINT>?} #83003
32643304
@@ -3302,6 +3342,14 @@ def build_detection_from_node(
33023342 # Gracenote Software, copyright © 2000-2008 Gracenote.
33033343 COPYRIGHT: {<COMPANY> <COPY>{1,2} <NAME-YEAR>} #157999.12
33043344
3345+ #Copyright (C) 2012-2016 by the following authors:
3346+ #- Wladimir J. van der Laan <[email protected] > 3347+
3348+ NAME-EMAIL: {<NNP> <NAME-EMAIL> } #157999.13
3349+ NAME-EMAIL: {<DASH> <NAME-EMAIL> <NN>?} #157999.14
3350+ COPYRIGHT: {<COPYRIGHT2> <FOLLOWING> <AUTHS> <NAME-EMAIL>+ } #157999.14
3351+
3352+
33053353#######################################
33063354# Copyright is held by ....
33073355#######################################
@@ -3804,6 +3852,8 @@ def is_junk_copyright(s, patterns=COPYRIGHTS_JUNK_PATTERN_MATCHERS):
38043852 '$' ,
38053853 'current.year' ,
38063854 "©" ,
3855+ 'author' ,
3856+ 'authors' ,
38073857 ])
38083858))
38093859
0 commit comments