@@ -806,6 +806,12 @@ def build_detection_from_node(
806806 # verbatime star
807807 (r'^\*$' , 'JUNK' ),
808808
809+ # misc company names exception to next rule
810+ (r'^TinCanTools$' , 'NNP' ),
811+ (r'^SoftwareBitMaker$' , 'NNP' ),
812+ (r'^NetCommWireless$' , 'NNP' ),
813+
814+ # Repeated CamelCasedWords
809815 (r'^([A-Z][a-z]+){3,}$' , 'JUNK' ),
810816
811817 ############################################################################
@@ -1079,7 +1085,7 @@ def build_detection_from_node(
10791085 (r'^whom$' , 'JUNK' ),
10801086 (r'^However,?$' , 'JUNK' ),
10811087 (r'^[Cc]ollectively$' , 'JUNK' ),
1082- (r'^following$' , 'JUNK ' ),
1088+ (r'^following$' , 'FOLLOWING ' ),
10831089 (r'^[Cc]onfig$' , 'JUNK' ),
10841090 (r'^file\.$' , 'JUNK' ),
10851091
@@ -1184,7 +1190,7 @@ def build_detection_from_node(
11841190 (r'^[a-z]{3,10}[A-Z][a-z]{3,10}$' , 'JUNK' ),
11851191
11861192 (r'^\$?Guid$' , 'JUNK' ),
1187- (r'^Small$' , 'NN' ),
1193+ # (r'^Small$', 'NN'),
11881194 (r'^implementing$' , 'JUNK' ),
11891195 (r'^Unlike$' , 'JUNK' ),
11901196 (r'^using$' , 'JUNK' ),
@@ -1206,6 +1212,11 @@ def build_detection_from_node(
12061212 # single period
12071213 (r"^\.$" , 'JUNK' ),
12081214
1215+ # exception to the next rule
1216+
1217+ # by PaX Team
1218+ (r"PaX$" , 'NN' ),
1219+
12091220 # short mixed caps with trailing cap: ZoY
12101221 (r"[A-Z][a-z][A-Z]$" , 'JUNK' ),
12111222
@@ -1405,6 +1416,7 @@ def build_detection_from_node(
14051416 (r'^STA$' , 'NN' ),
14061417 (r'^Page$' , 'NN' ),
14071418 (r'^Todo/Under$' , 'JUNK' ),
1419+ (r'^Under$' , 'NN' ),
14081420
14091421 (r'^Interrupt$' , 'NN' ),
14101422 (r'^cleanups?$' , 'JUNK' ),
@@ -1668,6 +1680,8 @@ def build_detection_from_node(
16681680 (r'^([Mm]onday|[Tt]uesday|[Ww]ednesday|[Tt]hursday|[Ff]riday|[Ss]aturday|[Ss]unday),?$' , 'DAY' ),
16691681 (r'^(Mon|Tue|Wed|Thu|Fri|Sat|Sun|May),?$' , 'NN' ),
16701682
1683+ (r'^[Dd]ebugging$' , 'JUNK' ),
1684+
16711685 # misc words that are not NNs
16721686 # lowercase verbs ending in "ing"
16731687 (r'^[a-z]+ing$' , 'NN' ),
@@ -1700,6 +1714,9 @@ def build_detection_from_node(
17001714 (r'^Moved$' , 'NN' ),
17011715 (r'^Phone$' , 'NN' ),
17021716
1717+ (r'^Inputs?$' , 'NN' ),
1718+
1719+
17031720 # dual caps that are not NNP
17041721 (r'^Make[A-Z]' , 'JUNK' ),
17051722 (r'^Create[A-Z]' , 'JUNK' ),
@@ -2069,6 +2086,7 @@ def build_detection_from_node(
20692086 # and Spanish/French Da Siva and De Gaulle
20702087 (r'^(([Vv][ao]n)|[Dd][aeu])$' , 'VAN' ),
20712088
2089+ (r'^aan$' , 'OF' ),
20722090 (r'^van$' , 'VAN' ),
20732091 (r'^Van$' , 'VAN' ),
20742092 (r'^von$' , 'VAN' ),
@@ -2134,7 +2152,10 @@ def build_detection_from_node(
21342152 (r'^\$?date-of-software$' , 'YR' ),
21352153 (r'^\$?date-of-document$' , 'YR' ),
21362154
2137- # cardinal numbers
2155+ # small-cardinal numbers, under 30
2156+ (r'^[0-3]?[0-9]?[\.,]?$' , 'CDS' ),
2157+
2158+ # all other cardinal numbers
21382159 (r'^-?[0-9]+(.[0-9]+)?[\.,]?$' , 'CD' ),
21392160
21402161 ############################################################################
@@ -2179,6 +2200,7 @@ def build_detection_from_node(
21792200
21802201 # exceptions to CAPS used in obfuscated emails like in joe AT foo DOT com
21812202 (r'^AT$' , 'AT' ),
2203+ (r'^AT$' , '<at>' ),
21822204 (r'^DOT$' , 'DOT' ),
21832205
21842206 # all CAPS word, at least 1 char long such as MIT, including an optional trailing comma or dot
@@ -2288,6 +2310,9 @@ def build_detection_from_node(
22882310 # some punctuation combos
22892311 (r'^(?:=>|->|<-|<=)$' , 'JUNK' ),
22902312
2313+ (r'^semiconductors?[\.,]?$' , 'NNP' ),
2314+
2315+
22912316 ############################################################################
22922317 # catch all other as Nouns
22932318 ############################################################################
@@ -2308,17 +2333,21 @@ def build_detection_from_node(
23082333
23092334 YR-RANGE: {<YR>+ <CC>+ <YR>} #20
23102335 YR-RANGE: {<YR> <DASH|TO>* <YR|BARE-YR>+} #30
2311- YR-RANGE: {<CD|BARE-YR>? <YR> <BARE-YR>?} #40
2336+ YR-RANGE: {<CD|CDS| BARE-YR>? <YR> <BARE-YR>?} #40
23122337 YR-RANGE: {<YR>+ <BARE-YR>? } #50
23132338 YR-AND: {<CC>? <YR>+ <CC>+ <YR>} #60
23142339 YR-RANGE: {<YR-AND>+} #70
23152340 YR-RANGE: {<YR-RANGE>+ <DASH|TO> <YR-RANGE>+} #71
23162341 YR-RANGE: {<YR-RANGE>+ <DASH>?} #72
23172342 # Copyright (c) 1999, 2000, 01, 03, 06 Ralf Baechle
2318- YR-RANGE: {<YR-RANGE> <CD>+} #72.2
2343+ YR-RANGE: {<YR-RANGE> <CD|CDS >+} #72.2
23192344
23202345 CD: {<BARE-YR>} #bareyear
23212346
2347+ # 5 Jan 2003
2348+ YR-RANGE: {<CDS> <NNP> <YR-RANGE>} #72.3
2349+
2350+
23222351#######################################
23232352# All/No/Some Rights Reserved
23242353#######################################
@@ -2343,6 +2372,9 @@ def build_detection_from_node(
2343237223442373 EMAIL: {<EMAIL> <NN> <EMAIL>} # email or email
23452374
2375+ # <srinivasa.deevi at conexant dot com>
2376+ EMAIL: {<EMAIL_START> <CC> <NN> <DOT> <NN> } #email with brackets
2377+
23462378#######################################
23472379# NAMES and COMPANIES
23482380#######################################
@@ -2408,8 +2440,9 @@ def build_detection_from_node(
24082440 # AT&T Laboratories, Cambridge
24092441 COMPANY: {<COMP> <COMP> <NNP>} #145
24102442
2443+ COMPANY: {<COMP> <CD|CDS> <COMP>} #170
2444+
24112445 # rare "Software in the public interest, Inc."
2412- COMPANY: {<COMP> <CD> <COMP>} #170
24132446 COMPANY: {<NNP> <IN><NN> <NNP> <NNP>+<COMP>?} #180
24142447
24152448 # Commonwealth Scientific and Industrial Research Organisation (CSIRO)
@@ -2558,18 +2591,21 @@ def build_detection_from_node(
25582591 NAME: {<NAME|NAME-EMAIL>+ <OF> <NNP> <OF> <NN>? <COMPANY>} #550
25592592 NAME: {<NAME|NAME-EMAIL>+ <CC|OF>? <NAME|NAME-EMAIL|COMPANY>} #560
25602593
2561- NAME: {<NNP><NNP>} #5611
2594+ NAME: {<NNP><NNP>} #561
25622595
25632596 # strip Software from Copyright (c) Ian Darwin 1995. Software
2564- NAME-YEAR: {<NAME>+ <YR-RANGE>} #5611
2597+ NAME-YEAR: {<NAME>+ <YR-RANGE>} #561.1
25652598
25662599 # Copyright 2018, OpenCensus Authors
2567- COPYRIGHT: {<COPY>+ <YR-RANGE> <NNP> <AUTHS>} #1579991
2600+ COPYRIGHT: {<COPY>+ <YR-RANGE> <NNP> <AUTHS>} #561.2
2601+
2602+ # Tom aan de Wiel
2603+ NAME: {<NNP> <OF> <VAN> <NNP> } # 561.3
25682604
2569- NAME-YEAR: {<YR-RANGE> <NNP>+ <CAPS>? <LINUX>?} #5612
2605+ NAME-YEAR: {<YR-RANGE> <NNP>+ <CAPS>? <LINUX>?} #562
25702606
25712607 #Academy of Motion Picture Arts and Sciences
2572- NAME: {<NAME> <CC> <NNP>} #561
2608+ NAME: {<NAME> <CC> <NNP>} #563
25732609
25742610 # Adam Weinberger and the GNOME Foundation
25752611 ANDCO: {<CC> <NN> <COMPANY>} #565
@@ -2581,6 +2617,8 @@ def build_detection_from_node(
25812617
25822618 URL: {<PARENS> <URL> <PARENS>} #5700
25832619
2620+ NAME-YEAR: {<NAME-YEAR> <CDS> <NNP>} #5700.1
2621+
25842622 #also accept trailing email and URLs
25852623 # and "VAN" e.g. Du: Copyright (c) 2008 Alek Du <[email protected] > 25862624 NAME-YEAR: {<NAME-YEAR> <VAN>? <EMAIL>?<URL>?} #5701
@@ -2591,7 +2629,7 @@ def build_detection_from_node(
25912629 NAME: {<NN|NNP|CAPS>+ <CC> <OTH>} #600
25922630 NAME: {<NNP> <CAPS>} #610
25932631 NAME: {<CAPS> <DASH>? <NNP|NAME>} #620
2594- NAME: {<NNP> <CD> <NNP>} #630
2632+ NAME: {<NNP> <CD|CDS > <NNP>} #630
25952633 NAME: {<COMP> <NAME>+} #640
25962634
25972635 # Copyright 2018-2019 @paritytech/substrate-light-ui authors & contributors
@@ -2983,7 +3021,11 @@ def build_detection_from_node(
29833021
29843022 # Russ Dill <[email protected] > 2001-2003 29853023 # Rewrited by Vladimir Oleynik <[email protected] > (C) 2003 2986- COPYRIGHT: {<NAME-EMAIL> <YR-RANGE> <AUTH2> <BY> <NAME-EMAIL> <COPY> <YR-RANGE>} #22793.5
3024+ COPYRIGHT: {<NAME-EMAIL> <YR-RANGE> <AUTH2> <BY> <NAME-EMAIL> <COPY> <YR-RANGE>} #2280-2
3025+
3026+ # Copyright (C) 2018
3027+ # Author: Jeff LaBundy <[email protected] > 3028+ COPYRIGHT: {<COPY> <COPY> <YR-RANGE> <AUTH> <NAME-EMAIL>} #2280-3
29873029
29883030 COPYRIGHT2: {<COPY>+ <NN|CAPS>? <YR-RANGE>+ <PN>*} #2280
29893031
@@ -3106,7 +3148,7 @@ def build_detection_from_node(
31063148 COPYRIGHT: {<COPYRIGHT2> <CAPS|COMPANY> <NN|LINUX> <COMPANY>} #2008
31073149
31083150 # Copyright (c) 2016-2018 JSR 371 expert group and contributors
3109- COPYRIGHT: {<COPYRIGHT2> <CAPS> <CD> <COMPANY> <NAME>} #2009.1
3151+ COPYRIGHT: {<COPYRIGHT2> <CAPS> <CD|CDS > <COMPANY> <NAME>} #2009.1
31103152
31113153 # COPYRIGHT (c) 2006 - 2009 DIONYSOS
31123154 COPYRIGHT: {<COPYRIGHT2> <CAPS>} #2009
@@ -3235,7 +3277,7 @@ def build_detection_from_node(
32353277 COPYRIGHT: {<COPY> <NNP> <NAME-YEAR> <COMPANY>?} #15720
32363278
32373279 # Copyright (c) 2008-1010 Intel Corporation
3238- COPYRIGHT: {<COPY> <COPY> <CD> <COMPANY>} #rare-cd-not-year
3280+ COPYRIGHT: {<COPY> <COPY> <CD|CDS > <COMPANY>} #rare-cd-not-year
32393281
32403282 # Copyright (C) 2005-2006 dann frazier <[email protected] > 32413283 COPYRIGHT: {<COPYRIGHT2> <NN> <NN> <EMAIL>} #999991
@@ -3258,6 +3300,9 @@ def build_detection_from_node(
32583300 # copyrighted by the Open Source Vulnerability Database (http://osvdb.org)
32593301 COPYRIGHT: {<COPY> <BY> <NN|NNP>{3} <NAME>} #83002.1
32603302
3303+ # (C) by the respective authors,
3304+ <COPYRIGHT>: { <COPY> <BY> <NN> <NN> <AUTHDOT>} #83002.2
3305+
32613306 # weird //opylefted by <-Harvie 2oo7
32623307 COPYRIGHT: {<COPY> <BY> <NN> <NN> <MAINT>?} #83003
32633308
@@ -3301,6 +3346,14 @@ def build_detection_from_node(
33013346 # Gracenote Software, copyright © 2000-2008 Gracenote.
33023347 COPYRIGHT: {<COMPANY> <COPY>{1,2} <NAME-YEAR>} #157999.12
33033348
3349+ #Copyright (C) 2012-2016 by the following authors:
3350+ #- Wladimir J. van der Laan <[email protected] > 3351+
3352+ NAME-EMAIL: {<NNP> <NAME-EMAIL> } #157999.13
3353+ NAME-EMAIL: {<DASH> <NAME-EMAIL> <NN>?} #157999.14
3354+ COPYRIGHT: {<COPYRIGHT2> <FOLLOWING> <AUTHS> <NAME-EMAIL>+ } #157999.14
3355+
3356+
33043357#######################################
33053358# Copyright is held by ....
33063359#######################################
@@ -3412,11 +3465,11 @@ def build_detection_from_node(
34123465
34133466 COPYRIGHT: {<COMPANY><COPY>+<ALLRIGHTRESERVED>} #99900
34143467
3415- COPYRIGHT: {<COPYRIGHT|COPYRIGHT2|COPY|NAME-COPY> <COPY|NNP|AUTHDOT|CAPS|CD|YR-RANGE|NAME|NAME-EMAIL|NAME-YEAR|NAME-COPY|NAME-CAPS|AUTHORANDCO|COMPANY|YEAR|PN|COMP|UNI|CC|OF|IN|BY|OTH|VAN|URL|EMAIL|URL2|MIXEDCAP|NN>+ <ALLRIGHTRESERVED>} #99999
3468+ COPYRIGHT: {<COPYRIGHT|COPYRIGHT2|COPY|NAME-COPY> <COPY|NNP|AUTHDOT|CAPS|CD|CDS| YR-RANGE|NAME|NAME-EMAIL|NAME-YEAR|NAME-COPY|NAME-CAPS|AUTHORANDCO|COMPANY|YEAR|PN|COMP|UNI|CC|OF|IN|BY|OTH|VAN|URL|EMAIL|URL2|MIXEDCAP|NN>+ <ALLRIGHTRESERVED>} #99999
34163469
34173470 # * Copyright (C) 2004 Red Hat, Inc.
34183471 # * Copyright (C) 200 Matthias Clasen <[email protected] > 3419- COPYRIGHT: {<COPY> <COPY> <CD> <NAME-EMAIL>} #9999970
3472+ COPYRIGHT: {<COPY> <COPY> <CD|CDS > <NAME-EMAIL>} #9999970
34203473
34213474 # <p class="copyright"><a href="http://www.w3.org/Consortium/Legal/ipr-notice-20000612#Copyright">Copyright</a>
34223475 COPYRIGHT: {<COPYRIGHT> <COPY>} #9999980
@@ -3803,6 +3856,8 @@ def is_junk_copyright(s, patterns=COPYRIGHTS_JUNK_PATTERN_MATCHERS):
38033856 '$' ,
38043857 'current.year' ,
38053858 "©" ,
3859+ 'author' ,
3860+ 'authors' ,
38063861 ])
38073862))
38083863
0 commit comments