Skip to content

Commit 52bc43b

Browse files
authored
Merge pull request github#12595 from michaelnebel/enhanceprovenance
Java/C# : Enhance provenance.
2 parents 8c46bfd + 169d8d5 commit 52bc43b

File tree

107 files changed

+107256
-106710
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

107 files changed

+107256
-106710
lines changed

config/identical-files.json

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,10 @@
123123
"java/ql/src/utils/modelgenerator/internal/CaptureModels.qll",
124124
"csharp/ql/src/utils/modelgenerator/internal/CaptureModels.qll"
125125
],
126+
"Model as Data Generation Java/C# - CaptureModelsPrinting": [
127+
"java/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll",
128+
"csharp/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll"
129+
],
126130
"Sign Java/C#": [
127131
"java/ql/lib/semmle/code/java/dataflow/internal/rangeanalysis/Sign.qll",
128132
"csharp/ql/lib/semmle/code/csharp/dataflow/internal/rangeanalysis/Sign.qll"
@@ -596,4 +600,4 @@
596600
"python/ql/lib/semmle/python/security/internal/EncryptionKeySizes.qll",
597601
"java/ql/lib/semmle/code/java/security/internal/EncryptionKeySizes.qll"
598602
]
599-
}
603+
}

csharp/ql/lib/ext/generated/dotnet_runtime.model.yml

Lines changed: 51924 additions & 51924 deletions
Large diffs are not rendered by default.

csharp/ql/lib/semmle/code/csharp/dataflow/ExternalFlow.qll

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -73,12 +73,15 @@
7373
* sources "remote" indicates a default remote flow source, and for summaries
7474
* "taint" indicates a default additional taint step and "value" indicates a
7575
* globally applicable value-preserving step.
76-
* 9. The `provenance` column is a tag to indicate the origin of the summary.
77-
* There are two supported values: "generated" and "manual". "generated" means that
78-
* the model has been emitted by the model generator tool and "manual" means
79-
* that the model has been written by hand. This information is used in a heuristic
80-
* for dataflow analysis to determine, if a model or source code should be used for
81-
* determining flow.
76+
* 9. The `provenance` column is a tag to indicate the origin and verification of a model.
77+
* The format is {origin}-{verification} or just "manual" where the origin describes
78+
* the origin of the model and verification describes how the model has been verified.
79+
* Some examples are:
80+
* - "df-generated": The model has been generated by the model generator tool.
81+
* - "df-manual": The model has been generated by the model generator and verified by a human.
82+
* - "manual": The model has been written by hand.
83+
* This information is used in a heuristic for dataflow analysis to determine, if a
84+
* model or source code should be used for determining flow.
8285
*/
8386

8487
import csharp
@@ -248,7 +251,7 @@ module ModelValidation {
248251
not ext.regexpMatch("|Attribute") and
249252
result = "Unrecognized extra API graph element \"" + ext + "\" in " + pred + " model."
250253
or
251-
not provenance = ["manual", "generated"] and
254+
invalidProvenance(provenance) and
252255
result = "Unrecognized provenance description \"" + provenance + "\" in " + pred + " model."
253256
)
254257
}

csharp/ql/lib/semmle/code/csharp/dataflow/FlowSummary.qll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,8 +137,6 @@ private class RecordConstructorFlow extends SummarizedCallable {
137137
preservesValue = true
138138
)
139139
}
140-
141-
override predicate hasProvenance(string provenance) { provenance = "manual" }
142140
}
143141

144142
class RequiredSummaryComponentStack = Impl::Public::RequiredSummaryComponentStack;

csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowDispatch.qll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ class DataFlowSummarizedCallable instanceof FlowSummary::SummarizedCallable {
9595
DataFlowSummarizedCallable() {
9696
not this.fromSource()
9797
or
98-
this.fromSource() and not this.isAutoGenerated()
98+
this.fromSource() and not this.applyGeneratedModel()
9999
}
100100

101101
string toString() { result = super.toString() }

csharp/ql/lib/semmle/code/csharp/dataflow/internal/FlowSummaryImpl.qll

Lines changed: 97 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,54 @@ module Public {
215215
abstract predicate required(SummaryComponent head, SummaryComponentStack tail);
216216
}
217217

218+
/**
219+
* Gets the valid model origin values.
220+
*/
221+
private string getValidModelOrigin() {
222+
result =
223+
[
224+
"ai", // AI (machine learning)
225+
"df", // Dataflow (model generator)
226+
"tb", // Type based (model generator)
227+
"hq", // Heuristic query
228+
]
229+
}
230+
231+
/**
232+
* A class used to represent provenance values for MaD models.
233+
*
234+
* The provenance value is a string of the form `origin-verification`
235+
* (or just `manual`), where `origin` is a value indicating the
236+
* origin of the model, and `verification` is a value indicating, how
237+
* the model was verified.
238+
*
239+
* Examples could be:
240+
* - `df-generated`: A model produced by the model generator, but not verified by a human.
241+
* - `ai-manual`: A model produced by AI, but verified by a human.
242+
*/
243+
class Provenance extends string {
244+
private string verification;
245+
246+
Provenance() {
247+
exists(string origin | origin = getValidModelOrigin() |
248+
this = origin + "-" + verification and
249+
verification = ["manual", "generated"]
250+
)
251+
or
252+
this = verification and verification = "manual"
253+
}
254+
255+
/**
256+
* Holds if this is a valid generated provenance value.
257+
*/
258+
predicate isGenerated() { verification = "generated" }
259+
260+
/**
261+
* Holds if this is a valid manual provenance value.
262+
*/
263+
predicate isManual() { verification = "manual" }
264+
}
265+
218266
/** A callable with a flow summary. */
219267
abstract class SummarizedCallable extends SummarizedCallableBase {
220268
bindingset[this]
@@ -248,41 +296,61 @@ module Public {
248296
}
249297

250298
/**
251-
* Holds if all the summaries that apply to `this` are auto generated and not manually created.
299+
* Holds if there exists a generated summary that applies to this callable.
252300
*/
253-
final predicate isAutoGenerated() {
254-
this.hasProvenance(["generated", "ai-generated"]) and not this.isManual()
301+
final predicate hasGeneratedModel() {
302+
exists(Provenance p | p.isGenerated() and this.hasProvenance(p))
255303
}
256304

257305
/**
258-
* Holds if there exists a manual summary that applies to `this`.
306+
* Holds if all the summaries that apply to this callable are auto generated and not manually created.
307+
* That is, only apply generated models, when there are no manual models.
259308
*/
260-
final predicate isManual() { this.hasProvenance("manual") }
309+
final predicate applyGeneratedModel() {
310+
this.hasGeneratedModel() and
311+
not this.hasManualModel()
312+
}
261313

262314
/**
263-
* Holds if there exists a summary that applies to `this` that has provenance `provenance`.
315+
* Holds if there exists a manual summary that applies to this callable.
264316
*/
265-
predicate hasProvenance(string provenance) { none() }
317+
final predicate hasManualModel() {
318+
exists(Provenance p | p.isManual() and this.hasProvenance(p))
319+
}
320+
321+
/**
322+
* Holds if there exists a manual summary that applies to this callable.
323+
* Always apply manual models if they exist.
324+
*/
325+
final predicate applyManualModel() { this.hasManualModel() }
326+
327+
/**
328+
* Holds if there exists a summary that applies to this callable
329+
* that has provenance `provenance`.
330+
*/
331+
predicate hasProvenance(Provenance provenance) { provenance = "manual" }
266332
}
267333

268334
/** A callable where there is no flow via the callable. */
269335
class NeutralCallable extends SummarizedCallableBase {
270-
NeutralCallable() { neutralElement(this, _) }
336+
private Provenance provenance;
337+
338+
NeutralCallable() { neutralElement(this, provenance) }
271339

272340
/**
273341
* Holds if the neutral is auto generated.
274342
*/
275-
predicate isAutoGenerated() { neutralElement(this, ["generated", "ai-generated"]) }
343+
final predicate hasGeneratedModel() { provenance.isGenerated() }
276344

277345
/**
278-
* Holds if there exists a manual neutral that applies to `this`.
346+
* Holds if there exists a manual neutral that applies to this callable.
279347
*/
280-
final predicate isManual() { this.hasProvenance("manual") }
348+
final predicate hasManualModel() { provenance.isManual() }
281349

282350
/**
283-
* Holds if the neutral has provenance `provenance`.
351+
* Holds if the neutral has provenance `p`.
284352
*/
285-
predicate hasProvenance(string provenance) { neutralElement(this, provenance) }
353+
predicate hasProvenance(Provenance p) { p = provenance }
286354
}
287355
}
288356

@@ -1017,12 +1085,18 @@ module Private {
10171085
private predicate relevantSummaryElementGenerated(
10181086
AccessPath inSpec, AccessPath outSpec, string kind
10191087
) {
1020-
summaryElement(this, inSpec, outSpec, kind, ["generated", "ai-generated"]) and
1021-
not summaryElement(this, _, _, _, "manual")
1088+
exists(Provenance provenance |
1089+
provenance.isGenerated() and
1090+
summaryElement(this, inSpec, outSpec, kind, provenance)
1091+
) and
1092+
not this.applyManualModel()
10221093
}
10231094

10241095
private predicate relevantSummaryElement(AccessPath inSpec, AccessPath outSpec, string kind) {
1025-
summaryElement(this, inSpec, outSpec, kind, "manual")
1096+
exists(Provenance provenance |
1097+
provenance.isManual() and
1098+
summaryElement(this, inSpec, outSpec, kind, provenance)
1099+
)
10261100
or
10271101
this.relevantSummaryElementGenerated(inSpec, outSpec, kind)
10281102
}
@@ -1041,7 +1115,7 @@ module Private {
10411115
)
10421116
}
10431117

1044-
override predicate hasProvenance(string provenance) {
1118+
override predicate hasProvenance(Provenance provenance) {
10451119
summaryElement(this, _, _, _, provenance)
10461120
}
10471121
}
@@ -1052,6 +1126,10 @@ module Private {
10521126
not exists(interpretComponent(c))
10531127
}
10541128

1129+
/** Holds if `provenance` is not a valid provenance value. */
1130+
bindingset[provenance]
1131+
predicate invalidProvenance(string provenance) { not provenance instanceof Provenance }
1132+
10551133
/**
10561134
* Holds if token `part` of specification `spec` has an invalid index.
10571135
* E.g., `Argument[-1]`.
@@ -1219,11 +1297,11 @@ module Private {
12191297
}
12201298

12211299
private string renderProvenance(SummarizedCallable c) {
1222-
if c.isManual() then result = "manual" else c.hasProvenance(result)
1300+
if c.applyManualModel() then result = "manual" else c.hasProvenance(result)
12231301
}
12241302

12251303
private string renderProvenanceNeutral(NeutralCallable c) {
1226-
if c.isManual() then result = "manual" else c.hasProvenance(result)
1304+
if c.hasManualModel() then result = "manual" else c.hasProvenance(result)
12271305
}
12281306

12291307
/**

csharp/ql/lib/semmle/code/csharp/frameworks/EntityFramework.qll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,6 @@ module EntityFramework {
8686
abstract class EFSummarizedCallable extends SummarizedCallable {
8787
bindingset[this]
8888
EFSummarizedCallable() { any() }
89-
90-
override predicate hasProvenance(string provenance) { provenance = "manual" }
9189
}
9290

9391
private class DbSetAddOrUpdateRequiredSummaryComponentStack extends RequiredSummaryComponentStack {

csharp/ql/src/utils/modelconverter/ExtractNeutrals.ql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,5 @@ import semmle.code.csharp.dataflow.ExternalFlow
1010
from string package, string type, string name, string signature, string provenance
1111
where
1212
neutralModel(package, type, name, signature, provenance) and
13-
provenance != "generated"
13+
not provenance.matches("%generated")
1414
select package, type, name, signature, provenance order by package, type, name, signature

csharp/ql/src/utils/modelconverter/ExtractSinks.ql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,6 @@ from
1212
string input, string kind, string provenance
1313
where
1414
sinkModel(namespace, type, subtypes, name, signature, ext, input, kind, provenance) and
15-
provenance != "generated"
15+
not provenance.matches("%generated")
1616
select namespace, type, subtypes, name, signature, ext, input, kind, provenance order by
1717
namespace, type, name, signature, input, kind

csharp/ql/src/utils/modelconverter/ExtractSources.ql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,6 @@ from
1212
string output, string kind, string provenance
1313
where
1414
sourceModel(namespace, type, subtypes, name, signature, ext, output, kind, provenance) and
15-
provenance != "generated"
15+
not provenance.matches("%generated")
1616
select namespace, type, subtypes, name, signature, ext, output, kind, provenance order by
1717
namespace, type, name, signature, output, kind

0 commit comments

Comments
 (0)