Skip to content

Commit aefb433

Browse files
authored
Merge pull request github#11738 from michaelnebel/go/maddataextensions
Go: Models as Data using extensions.
2 parents 8f9ad1f + 48d0ecc commit aefb433

File tree

22 files changed

+217
-330
lines changed

22 files changed

+217
-330
lines changed

go/ql/lib/ext/archive_tar.model.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
extensions:
2+
- addsTo:
3+
pack: codeql/go-all
4+
extensible: summaryModel
5+
data:
6+
- ["archive/tar", "", True, "FileInfoHeader", "", "", "Argument[0]", "ReturnValue[0]", "taint", "manual"]

go/ql/lib/ext/builtin.model.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
extensions:
2+
- addsTo:
3+
pack: codeql/go-all
4+
extensible: summaryModel
5+
data:
6+
- ["", "", False, "append", "", "", "Argument[0].ArrayElement", "ReturnValue.ArrayElement", "value", "manual"]
7+
- ["", "", False, "append", "", "", "Argument[1]", "ReturnValue.ArrayElement", "value", "manual"]

go/ql/lib/ext/dummy.model.yml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
extensions:
2+
# Make sure that the extensible model predicates are at least defined as empty.
3+
- addsTo:
4+
pack: codeql/go-all
5+
extensible: sourceModel
6+
data: []
7+
- addsTo:
8+
pack: codeql/go-all
9+
extensible: sinkModel
10+
data: []
11+
- addsTo:
12+
pack: codeql/go-all
13+
extensible: summaryModel
14+
data: []

go/ql/lib/ext/net_http.model.yml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
extensions:
2+
- addsTo:
3+
pack: codeql/go-all
4+
extensible: sourceModel
5+
data:
6+
- ["net/http", "Request", True, "Cookie", "", "", "ReturnValue[0]", "remote", "manual"]
7+
- ["net/http", "Request", True, "Cookies", "", "", "ReturnValue.ArrayElement", "remote", "manual"]
8+
- ["net/http", "Request", True, "FormFile", "", "", "ReturnValue[0..1]", "remote", "manual"]
9+
- ["net/http", "Request", True, "FormValue", "", "", "ReturnValue", "remote", "manual"]
10+
- ["net/http", "Request", True, "MultipartReader", "", "", "ReturnValue[0]", "remote", "manual"]
11+
- ["net/http", "Request", True, "PostFormValue", "", "", "ReturnValue", "remote", "manual"]
12+
- ["net/http", "Request", True, "Referer", "", "", "ReturnValue", "remote", "manual"]
13+
- ["net/http", "Request", True, "UserAgent", "", "", "ReturnValue", "remote", "manual"]

go/ql/lib/qlpack.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,5 @@ library: true
77
upgrades: upgrades
88
dependencies:
99
codeql/tutorial: ${workspace}
10-
10+
dataExtensions:
11+
- ext/*.model.yml

go/ql/lib/semmle/go/dataflow/ExternalFlow.qll

Lines changed: 45 additions & 173 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,20 @@
11
/**
22
* INTERNAL use only. This is an experimental API subject to change without notice.
33
*
4-
* Provides classes and predicates for dealing with flow models specified in CSV format.
4+
* Provides classes and predicates for dealing with MaD flow models specified
5+
* in data extensions and CSV format.
56
*
67
* The CSV specification has the following columns:
78
* - Sources:
8-
* `namespace; type; subtypes; name; signature; ext; output; kind`
9+
* `package; type; subtypes; name; signature; ext; output; kind; provenance`
910
* - Sinks:
10-
* `namespace; type; subtypes; name; signature; ext; input; kind`
11+
* `package; type; subtypes; name; signature; ext; input; kind; provenance`
1112
* - Summaries:
12-
* `namespace; type; subtypes; name; signature; ext; input; output; kind`
13+
* `package; type; subtypes; name; signature; ext; input; output; kind; provenance`
1314
*
1415
* The interpretation of a row is similar to API-graphs with a left-to-right
1516
* reading.
16-
* 1. The `namespace` column selects a package.
17+
* 1. The `package` column selects a package.
1718
* 2. The `type` column selects a type within that package.
1819
* 3. The `subtypes` is a boolean that indicates whether to jump to an
1920
* arbitrary subtype of that type.
@@ -61,6 +62,7 @@
6162
*/
6263

6364
private import go
65+
private import ExternalFlowExtensions as Extensions
6466
private import internal.DataFlowPrivate
6567
private import internal.FlowSummaryImpl::Private::External
6668
private import internal.FlowSummaryImplSpecific
@@ -75,155 +77,54 @@ private module Frameworks {
7577
private import semmle.go.frameworks.Stdlib
7678
}
7779

78-
private class BuiltinModel extends SummaryModelCsv {
79-
override predicate row(string row) {
80-
row =
81-
[
82-
";;false;append;;;Argument[0].ArrayElement;ReturnValue.ArrayElement;value",
83-
";;false;append;;;Argument[1];ReturnValue.ArrayElement;value"
84-
]
85-
}
86-
}
87-
88-
/**
89-
* A unit class for adding additional source model rows.
90-
*
91-
* Extend this class to add additional source definitions.
92-
*/
93-
class SourceModelCsv extends Unit {
94-
/** Holds if `row` specifies a source definition. */
95-
abstract predicate row(string row);
96-
}
97-
98-
/**
99-
* A unit class for adding additional sink model rows.
100-
*
101-
* Extend this class to add additional sink definitions.
102-
*/
103-
class SinkModelCsv extends Unit {
104-
/** Holds if `row` specifies a sink definition. */
105-
abstract predicate row(string row);
106-
}
107-
108-
/**
109-
* A unit class for adding additional summary model rows.
110-
*
111-
* Extend this class to add additional flow summary definitions.
112-
*/
113-
class SummaryModelCsv extends Unit {
114-
/** Holds if `row` specifies a summary definition. */
115-
abstract predicate row(string row);
116-
}
117-
118-
/** Holds if `row` is a source model. */
119-
predicate sourceModel(string row) { any(SourceModelCsv s).row(row) }
120-
121-
/** Holds if `row` is a sink model. */
122-
predicate sinkModel(string row) { any(SinkModelCsv s).row(row) }
123-
124-
/** Holds if `row` is a summary model. */
125-
predicate summaryModel(string row) { any(SummaryModelCsv s).row(row) }
126-
12780
/** Holds if a source model exists for the given parameters. */
128-
predicate sourceModel(
129-
string namespace, string type, boolean subtypes, string name, string signature, string ext,
130-
string output, string kind, string provenance
131-
) {
132-
exists(string row |
133-
sourceModel(row) and
134-
row.splitAt(";", 0) = namespace and
135-
row.splitAt(";", 1) = type and
136-
row.splitAt(";", 2) = subtypes.toString() and
137-
subtypes = [true, false] and
138-
row.splitAt(";", 3) = name and
139-
row.splitAt(";", 4) = signature and
140-
row.splitAt(";", 5) = ext and
141-
row.splitAt(";", 6) = output and
142-
row.splitAt(";", 7) = kind and
143-
provenance = "manual"
144-
)
145-
}
81+
predicate sourceModel = Extensions::sourceModel/9;
14682

14783
/** Holds if a sink model exists for the given parameters. */
148-
predicate sinkModel(
149-
string namespace, string type, boolean subtypes, string name, string signature, string ext,
150-
string input, string kind, string provenance
151-
) {
152-
exists(string row |
153-
sinkModel(row) and
154-
row.splitAt(";", 0) = namespace and
155-
row.splitAt(";", 1) = type and
156-
row.splitAt(";", 2) = subtypes.toString() and
157-
subtypes = [true, false] and
158-
row.splitAt(";", 3) = name and
159-
row.splitAt(";", 4) = signature and
160-
row.splitAt(";", 5) = ext and
161-
row.splitAt(";", 6) = input and
162-
row.splitAt(";", 7) = kind and
163-
provenance = "manual"
164-
)
165-
}
84+
predicate sinkModel = Extensions::sinkModel/9;
16685

16786
/** Holds if a summary model exists for the given parameters. */
168-
predicate summaryModel(
169-
string namespace, string type, boolean subtypes, string name, string signature, string ext,
170-
string input, string output, string kind, string provenance
171-
) {
172-
exists(string row |
173-
summaryModel(row) and
174-
row.splitAt(";", 0) = namespace and
175-
row.splitAt(";", 1) = type and
176-
row.splitAt(";", 2) = subtypes.toString() and
177-
subtypes = [true, false] and
178-
row.splitAt(";", 3) = name and
179-
row.splitAt(";", 4) = signature and
180-
row.splitAt(";", 5) = ext and
181-
row.splitAt(";", 6) = input and
182-
row.splitAt(";", 7) = output and
183-
row.splitAt(";", 8) = kind
184-
) and
185-
provenance = "manual"
186-
}
87+
predicate summaryModel = Extensions::summaryModel/10;
18788

188-
/** Holds if `package` have CSV framework coverage. */
189-
private predicate packageHasCsvCoverage(string package) {
89+
/** Holds if `package` have MaD framework coverage. */
90+
private predicate packageHasMaDCoverage(string package) {
19091
sourceModel(package, _, _, _, _, _, _, _, _) or
19192
sinkModel(package, _, _, _, _, _, _, _, _) or
19293
summaryModel(package, _, _, _, _, _, _, _, _, _)
19394
}
19495

19596
/**
196-
* Holds if `package` and `subpkg` have CSV framework coverage and `subpkg`
97+
* Holds if `package` and `subpkg` have MaD framework coverage and `subpkg`
19798
* is a subpackage of `package`.
19899
*/
199100
private predicate packageHasASubpackage(string package, string subpkg) {
200-
packageHasCsvCoverage(package) and
201-
packageHasCsvCoverage(subpkg) and
101+
packageHasMaDCoverage(package) and
102+
packageHasMaDCoverage(subpkg) and
202103
subpkg.prefix(subpkg.indexOf(".")) = package
203104
}
204105

205106
/**
206-
* Holds if `package` has CSV framework coverage and it is not a subpackage of
207-
* any other package with CSV framework coverage.
107+
* Holds if `package` has MaD framework coverage and it is not a subpackage of
108+
* any other package with MaD framework coverage.
208109
*/
209110
private predicate canonicalPackage(string package) {
210-
packageHasCsvCoverage(package) and not packageHasASubpackage(_, package)
111+
packageHasMaDCoverage(package) and not packageHasASubpackage(_, package)
211112
}
212113

213114
/**
214-
* Holds if `package` and `subpkg` have CSV framework coverage, `subpkg` is a
115+
* Holds if `package` and `subpkg` have MaD framework coverage, `subpkg` is a
215116
* subpackage of `package` (or they are the same), and `package` is not a
216-
* subpackage of any other package with CSV framework coverage.
117+
* subpackage of any other package with MaD framework coverage.
217118
*/
218119
private predicate canonicalPackageHasASubpackage(string package, string subpkg) {
219120
canonicalPackage(package) and
220121
(subpkg = package or packageHasASubpackage(package, subpkg))
221122
}
222123

223124
/**
224-
* Holds if CSV framework coverage of `package` is `n` api endpoints of the
125+
* Holds if MaD framework coverage of `package` is `n` api endpoints of the
225126
* kind `(kind, part)`, and `pkgs` is the number of subpackages of `package`
226-
* which have CSV framework coverage (including `package` itself).
127+
* which have MaD framework coverage (including `package` itself).
227128
*/
228129
predicate modelCoverage(string package, int pkgs, string kind, string part, int n) {
229130
pkgs = strictcount(string subpkg | canonicalPackageHasASubpackage(package, subpkg)) and
@@ -254,8 +155,8 @@ predicate modelCoverage(string package, int pkgs, string kind, string part, int
254155
)
255156
}
256157

257-
/** Provides a query predicate to check the CSV data for validation errors. */
258-
module CsvValidation {
158+
/** Provides a query predicate to check the MaD models for validation errors. */
159+
module ModelValidation {
259160
private string getInvalidModelInput() {
260161
exists(string pred, AccessPath input, string part |
261162
sinkModel(_, _, _, _, _, _, input, _, _) and pred = "sink"
@@ -288,57 +189,25 @@ module CsvValidation {
288189
}
289190

290191
private string getInvalidModelKind() {
291-
exists(string row, string kind | summaryModel(row) |
292-
kind = row.splitAt(";", 8) and
192+
exists(string kind | summaryModel(_, _, _, _, _, _, _, _, kind, _) |
293193
not kind = ["taint", "value"] and
294194
result = "Invalid kind \"" + kind + "\" in summary model."
295195
)
296196
}
297197

298-
private string getInvalidModelSubtype() {
299-
exists(string pred, string row |
300-
sourceModel(row) and pred = "source"
301-
or
302-
sinkModel(row) and pred = "sink"
303-
or
304-
summaryModel(row) and pred = "summary"
305-
|
306-
exists(string b |
307-
b = row.splitAt(";", 2) and
308-
not b = ["true", "false"] and
309-
result = "Invalid boolean \"" + b + "\" in " + pred + " model."
310-
)
311-
)
312-
}
313-
314-
private string getInvalidModelColumnCount() {
315-
exists(string pred, string row, int expect |
316-
sourceModel(row) and expect = 8 and pred = "source"
317-
or
318-
sinkModel(row) and expect = 8 and pred = "sink"
319-
or
320-
summaryModel(row) and expect = 9 and pred = "summary"
321-
|
322-
exists(int cols |
323-
cols = 1 + max(int n | exists(row.splitAt(";", n))) and
324-
cols != expect and
325-
result =
326-
"Wrong number of columns in " + pred + " model row, expected " + expect + ", got " + cols +
327-
"."
328-
)
329-
)
330-
}
331-
332198
private string getInvalidModelSignature() {
333-
exists(string pred, string namespace, string type, string name, string signature, string ext |
334-
sourceModel(namespace, type, _, name, signature, ext, _, _, _) and pred = "source"
199+
exists(
200+
string pred, string package, string type, string name, string signature, string ext,
201+
string provenance
202+
|
203+
sourceModel(package, type, _, name, signature, ext, _, _, provenance) and pred = "source"
335204
or
336-
sinkModel(namespace, type, _, name, signature, ext, _, _, _) and pred = "sink"
205+
sinkModel(package, type, _, name, signature, ext, _, _, provenance) and pred = "sink"
337206
or
338-
summaryModel(namespace, type, _, name, signature, ext, _, _, _, _) and pred = "summary"
207+
summaryModel(package, type, _, name, signature, ext, _, _, _, provenance) and pred = "summary"
339208
|
340-
not namespace.regexpMatch("[a-zA-Z0-9_\\./]*") and
341-
result = "Dubious namespace \"" + namespace + "\" in " + pred + " model."
209+
not package.regexpMatch("[a-zA-Z0-9_\\./]*") and
210+
result = "Dubious package \"" + package + "\" in " + pred + " model."
342211
or
343212
not type.regexpMatch("[a-zA-Z0-9_\\$<>]*") and
344213
result = "Dubious type \"" + type + "\" in " + pred + " model."
@@ -351,26 +220,29 @@ module CsvValidation {
351220
or
352221
not ext.regexpMatch("|Annotated") and
353222
result = "Unrecognized extra API graph element \"" + ext + "\" in " + pred + " model."
223+
or
224+
not provenance = ["manual", "generated"] and
225+
result = "Unrecognized provenance description \"" + provenance + "\" in " + pred + " model."
354226
)
355227
}
356228

357-
/** Holds if some row in a CSV-based flow model appears to contain typos. */
229+
/** Holds if some row in a MaD flow model appears to contain typos. */
358230
query predicate invalidModelRow(string msg) {
359231
msg =
360232
[
361233
getInvalidModelSignature(), getInvalidModelInput(), getInvalidModelOutput(),
362-
getInvalidModelSubtype(), getInvalidModelColumnCount(), getInvalidModelKind()
234+
getInvalidModelKind()
363235
]
364236
}
365237
}
366238

367239
pragma[nomagic]
368240
private predicate elementSpec(
369-
string namespace, string type, boolean subtypes, string name, string signature, string ext
241+
string package, string type, boolean subtypes, string name, string signature, string ext
370242
) {
371-
sourceModel(namespace, type, subtypes, name, signature, ext, _, _, _) or
372-
sinkModel(namespace, type, subtypes, name, signature, ext, _, _, _) or
373-
summaryModel(namespace, type, subtypes, name, signature, ext, _, _, _, _)
243+
sourceModel(package, type, subtypes, name, signature, ext, _, _, _) or
244+
sinkModel(package, type, subtypes, name, signature, ext, _, _, _) or
245+
summaryModel(package, type, subtypes, name, signature, ext, _, _, _, _)
374246
}
375247

376248
private string paramsStringPart(Function f, int i) {
@@ -466,7 +338,7 @@ predicate parseContent(string component, DataFlow::Content content) {
466338
cached
467339
private module Cached {
468340
/**
469-
* Holds if `node` is specified as a source with the given kind in a CSV flow
341+
* Holds if `node` is specified as a source with the given kind in a MaD flow
470342
* model.
471343
*/
472344
cached
@@ -475,7 +347,7 @@ private module Cached {
475347
}
476348

477349
/**
478-
* Holds if `node` is specified as a sink with the given kind in a CSV flow
350+
* Holds if `node` is specified as a sink with the given kind in a MaD flow
479351
* model.
480352
*/
481353
cached

0 commit comments

Comments
 (0)