@@ -815,6 +815,9 @@ def build_detection_from_node(
815815 (r'^@?link:?$' , 'JUNK' ),
816816 (r'@license:?$' , 'JUNK' ),
817817
818+ # hex is JUNK 0x3fc3/0x7cff
819+ (r'^0x[a-fA-F0-9]+' , 'JUNK' ),
820+
818821 # found in crypto certificates and LDAP
819822 (r'^O=$' , 'JUNK' ),
820823 (r'^OU=?$' , 'JUNK' ),
@@ -1045,7 +1048,7 @@ def build_detection_from_node(
10451048 (r'^Owner$' , 'JUNK' ),
10461049 (r'^behalf$' , 'JUNK' ),
10471050 (r'^know-how$' , 'JUNK' ),
1048- (r'^interfaces ?,?$' , 'JUNK' ),
1051+ (r'^[Ii]nterfaces ?,?$' , 'JUNK' ),
10491052 (r'^than$' , 'JUNK' ),
10501053 (r'^whom$' , 'JUNK' ),
10511054 (r'^However,?$' , 'JUNK' ),
@@ -1206,6 +1209,10 @@ def build_detection_from_node(
12061209 (r'^False.?$' , 'JUNK' ),
12071210 (r'^True.?$' , 'JUNK' ),
12081211
1212+ (r'^high$' , 'JUNK' ),
1213+ (r'^low$' , 'JUNK' ),
1214+ (r'^on$' , 'JUNK' ),
1215+
12091216 (r'^imports?$' , 'JUNK' ),
12101217 (r'^[Ww]arnings?$' , 'JUNK' ),
12111218 (r'^[Ww]hether$' , 'JUNK' ),
@@ -1303,7 +1310,7 @@ def build_detection_from_node(
13031310 (r'^Code$' , 'NN' ),
13041311 (r'^Collators?$' , 'NN' ),
13051312 (r'^Commercial' , 'NN' ),
1306- (r'^Commons$' , 'NN' ),
1313+ (r'^Commons? $' , 'NN' ),
13071314 # TODO: Compilation could be JUNK?
13081315 (r'^Compilation' , 'NN' ),
13091316 (r'^Contact' , 'NN' ),
@@ -1357,6 +1364,16 @@ def build_detection_from_node(
13571364 (r'^With$' , 'NN' ),
13581365 (r'^Tick$' , 'NN' ),
13591366 (r'^Dynamic$' , 'NN' ),
1367+ (r'^Battery$' , 'NN' ),
1368+ (r'^Charger$' , 'NN' ),
1369+ (r'^Dynamic$' , 'NN' ),
1370+ (r'^Bugfixes?$' , 'NN' ),
1371+ (r'^Likes?$' , 'NN' ),
1372+ (r'^STA$' , 'NN' ),
1373+
1374+ (r'^Interrupt$' , 'NN' ),
1375+ (r'^cleanups?$' , 'JUNK' ),
1376+ (r'^Tape$' , 'NN' ),
13601377
13611378 (r'^When$' , 'NN' ),
13621379 # (r'^Owner$', 'NN'),
@@ -1384,8 +1401,8 @@ def build_detection_from_node(
13841401 (r'^Gaim$' , 'NN' ),
13851402 (r'^Generated' , 'NN' ),
13861403 (r'^Glib$' , 'NN' ),
1387- (r'^GPLd' , 'NN' ),
1388- (r'^GPL\'d' , 'NN' ),
1404+ (r'^GPLd?\.?$ ' , 'NN' ),
1405+ (r'^GPL\'d$ ' , 'NN' ),
13891406 (r'^Gnome$' , 'NN' ),
13901407 (r'^GnuPG$' , 'NN' ),
13911408 (r'^Government.' , 'NNP' ),
@@ -1477,7 +1494,7 @@ def build_detection_from_node(
14771494 (r'^POSIX$' , 'NN' ),
14781495 (r'^Possible' , 'NN' ),
14791496 (r'^Powered$' , 'NN' ),
1480- (r'^defined$' , 'NN ' ),
1497+ (r'^defined? $' , 'JUNK ' ),
14811498 (r'^Predefined$' , 'NN' ),
14821499 (r'^Promise$' , 'NN' ),
14831500 (r'^Products?\.?$' , 'NN' ),
@@ -1576,7 +1593,7 @@ def build_detection_from_node(
15761593 (r'^Branched$' , 'NN' ),
15771594
15781595 (r'^Improved$' , 'NN' ),
1579- (r'^Designed $' , 'NN' ),
1596+ (r'^Designe[dr] $' , 'NN' ),
15801597 (r'^Organised$' , 'NN' ),
15811598 (r'^Re-organised$' , 'NN' ),
15821599 (r'^Swap$' , 'NN' ),
@@ -1897,6 +1914,9 @@ def build_detection_from_node(
18971914 # et al.
18981915 (r'^al\.$' , 'AUTHDOT' ),
18991916
1917+ # in Linux LKMs
1918+ (r'^MODULEAUTHOR$' , 'AUTH' ),
1919+
19001920 # Contributor(s)
19011921 (r'^[Cc]ontributors[,\.]?$' , 'CONTRIBUTORS' ),
19021922 (r'^Contributor[,\.]?$' , 'NN' ),
@@ -3548,7 +3568,8 @@ def refine_names(s, prefixes):
35483568 r'^\(c\) Object c$' ,
35493569 r'^copyright headers?' ,
35503570 r'Copyright \(c\) 2021 Dot' ,
3551- r'^\(c\) \(c\) B$'
3571+ r'^\(c\) \(c\) B$' ,
3572+ r'^\(c\) group$' ,
35523573]
35533574
35543575# a collection of junk junk matcher callables
@@ -3591,6 +3612,7 @@ def is_junk_copyryright(s, patterns=COPYRIGHTS_JUNK_PATTERN_MATCHERS):
35913612 'mailto:' ,
35923613 "name'" ,
35933614 "a" ,
3615+ "moduleauthor" ,
35943616 ])
35953617))
35963618
@@ -4301,6 +4323,8 @@ def prepare_text_line(line, dedeb=True, to_ascii=True):
43014323 # C and C++ style comment markers
43024324 .replace ('/*' , ' ' ).replace ('*/' , ' ' )
43034325 .strip ().strip ('/*#' )
4326+ # in rst
4327+ .replace ('|copy|' , ' (c) ' )
43044328 # un common pipe chars in some ascii art
43054329 .replace ('|' , ' ' )
43064330
0 commit comments