@@ -3,11 +3,45 @@ import semmle.code.cpp.File
3
3
import semmle.code.cpp.Preprocessor
4
4
5
5
/**
6
- * Holds if `c` is a comment which is usually seen in autogenerated files.
7
- * For example, comments containing 'autogenerated' or ' generated by' .
6
+ * Holds if comment `c` indicates that it might be in an auto-generated file, for
7
+ * example because it contains the text "auto- generated by" .
8
8
*/
9
- predicate isAutogeneratedComment ( Comment c ) {
10
- c .getContents ( ) .regexpMatch ( "(?si).*(?:auto[ -]?generated|generated (?:by|file)|changes made in this file will be lost).*" )
9
+ private bindingset [ comment] predicate autogeneratedComment ( string comment ) {
10
+ // ?s = include newlines in anything (`.`)
11
+ // ?i = ignore case
12
+ exists ( string cond |
13
+ cond =
14
+ // generated by (not mid-sentence)
15
+ "(^ generated by[^a-z])|" +
16
+ "(! generated by[^a-z])|" +
17
+
18
+ // generated file
19
+ "(generated file)|" +
20
+
21
+ // file [is/was/has been] generated
22
+ "(file( is| was| has been)? generated)|" +
23
+
24
+ // changes made in this file will be lost
25
+ "(changes made in this file will be lost)|" +
26
+
27
+ // do not edit/modify
28
+ "(^ do(n't|nt| not) (hand-?)?(edit|modify))|" +
29
+ "(! do(n't|nt| not) (hand-?)?(edit|modify))" and
30
+
31
+ comment .regexpMatch ( "(?si).*(" +
32
+ // replace `generated` with a regexp that also catches things like
33
+ // `auto-generated`.
34
+ cond .replaceAll ( "generated" , "(auto[\\w-]*[\\s/\\*\\r\\n]*)?generated" )
35
+
36
+ // replace `!` with a regexp for end-of-sentence / separator characters.
37
+ .replaceAll ( "!" , "[\\.\\?\\!\\-\\;\\,]" )
38
+
39
+ // replace ` ` with a regexp for one or more whitespace characters
40
+ // (including newlines and `/*`).
41
+ .replaceAll ( " " , "[\\s/\\*\\r\\n]+" ) +
42
+ ").*"
43
+ )
44
+ )
11
45
}
12
46
13
47
/**
@@ -25,6 +59,48 @@ predicate hasPragmaDifferentFile(File f) {
25
59
)
26
60
}
27
61
62
+ /**
63
+ * The line where the first comment in file `f` begins (maximum of 5). This allows
64
+ * us to skip past any preprocessor logic or similar code before the first comment.
65
+ */
66
+ private int fileFirstComment ( File f ) {
67
+ result = min ( int line |
68
+ exists ( Comment c |
69
+ c .getFile ( ) = f and
70
+ c .getLocation ( ) .getStartLine ( ) = line and
71
+ line < 5
72
+ )
73
+ ) .minimum ( 5 )
74
+ }
75
+
76
+ /**
77
+ * The line where the initial comments of file `f` end. This is just before the
78
+ * first bit of code, excluding anything skipped over by `fileFirstComment`.
79
+ */
80
+ private int fileHeaderLimit ( File f ) {
81
+ exists ( int fc |
82
+ fc = fileFirstComment ( f ) and
83
+ result = min ( int line |
84
+ exists ( DeclarationEntry de , Location l |
85
+ l = de .getLocation ( ) and
86
+ l .getFile ( ) = f and
87
+ line = l .getStartLine ( ) - 1 and
88
+ line > fc
89
+ ) or exists ( PreprocessorDirective pd , Location l |
90
+ l = pd .getLocation ( ) and
91
+ l .getFile ( ) = f and
92
+ line = l .getStartLine ( ) - 1 and
93
+ line > fc
94
+ ) or exists ( NamespaceDeclarationEntry nde , Location l |
95
+ l = nde .getLocation ( ) and
96
+ l .getFile ( ) = f and
97
+ line = l .getStartLine ( ) - 1 and
98
+ line > fc
99
+ ) or line = f .getMetrics ( ) .getNumberOfLines ( )
100
+ )
101
+ )
102
+ }
103
+
28
104
/**
29
105
* Holds if the file is probably an autogenerated file.
30
106
*
@@ -36,12 +112,13 @@ predicate hasPragmaDifferentFile(File f) {
36
112
*/
37
113
class AutogeneratedFile extends File {
38
114
cached AutogeneratedFile ( ) {
39
- exists ( int limit , int head |
40
- head <= 5 and
41
- limit = max ( int line | locations_default ( _, underlyingElement ( this ) , head , _, line , _) ) + 5
42
- |
43
- exists ( Comment c | c .getFile ( ) = this and c .getLocation ( ) .getStartLine ( ) <= limit and isAutogeneratedComment ( c ) )
44
- )
45
- or hasPragmaDifferentFile ( this )
115
+ autogeneratedComment (
116
+ strictconcat ( Comment c |
117
+ c .getFile ( ) = this and
118
+ c .getLocation ( ) .getStartLine ( ) <= fileHeaderLimit ( this ) |
119
+ c .getContents ( ) order by c .getLocation ( ) .getStartLine ( )
120
+ )
121
+ ) or
122
+ hasPragmaDifferentFile ( this )
46
123
}
47
124
}
0 commit comments