Skip to content

Commit dca2273

Browse files
committed
Java: Add metric queries for counting sinks coming from models
Adds two queries for gathering metrics on the number of alerts (for a selection of queries) that arise from models with the `ai-generated` provenance.
1 parent 634c838 commit dca2273

File tree

4 files changed

+150
-0
lines changed

4 files changed

+150
-0
lines changed
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
private import java
2+
private import semmle.code.java.dataflow.ExternalFlow as ExternalFlow
3+
private import semmle.code.java.dataflow.internal.DataFlow
4+
private import semmle.code.java.dataflow.TaintTracking
5+
private import semmle.code.java.security.RequestForgeryConfig
6+
private import semmle.code.java.security.CommandLineQuery
7+
private import semmle.code.java.security.SqlConcatenatedQuery
8+
private import semmle.code.java.security.SqlInjectionQuery
9+
private import semmle.code.java.security.UrlRedirectQuery
10+
private import semmle.code.java.security.TaintedPathQuery
11+
private import semmle.code.java.security.SqlInjectionQuery
12+
private import AutomodelJavaUtil
13+
14+
/** An expression that may correspond to a sink model. */
15+
private class PotentialSinkModelExpr extends Expr {
16+
/**
17+
* Holds if this expression has the given signature. The signature should contain enough
18+
* information to determine a corresponding sink model, if one exists.
19+
*/
20+
pragma[nomagic]
21+
predicate hasSignature(
22+
string package, string type, boolean subtypes, string name, string signature, string input
23+
) {
24+
exists(Call call, Callable callable, int argIdx |
25+
call.getCallee() = callable and
26+
(
27+
this = call.getArgument(argIdx)
28+
or
29+
this = call.getQualifier() and argIdx = -1
30+
) and
31+
input = getArgumentForIndex(argIdx) and
32+
package = callable.getDeclaringType().getPackage().getName() and
33+
type = callable.getDeclaringType().getErasure().(RefType).nestedName() and
34+
subtypes = considerSubtypes(callable) and
35+
name = callable.getName() and
36+
signature = ExternalFlow::paramsString(callable)
37+
)
38+
}
39+
}
40+
41+
private string pyBool(boolean b) {
42+
b = true and result = "True"
43+
or
44+
b = false and result = "False"
45+
}
46+
47+
/**
48+
* Gets a string representation of the existing sink model at the expression `e`, in the format in
49+
* which it would appear in a Models-as-Data file.
50+
*/
51+
string getSinkModelRepr(PotentialSinkModelExpr e) {
52+
exists(
53+
string package, string type, boolean subtypes, string name, string signature, string input,
54+
string ext, string kind, string provenance
55+
|
56+
e.hasSignature(package, type, subtypes, name, signature, input) and
57+
ExternalFlow::sinkModel(package, type, subtypes, name, signature, ext, input, kind, provenance) and
58+
provenance = "ai-generated" and
59+
result =
60+
"\"" + package + "\", \"" + type + "\", " + pyBool(subtypes) + ", \"" + name + "\", \"" +
61+
signature + "\", \"" + ext + "\", \"" + input + "\", \"" + kind + "\", \"" + provenance +
62+
"\""
63+
)
64+
}
65+
66+
/**
67+
* Gets the string representation of a sink model in a format suitable for appending to an alert
68+
* message.
69+
*/
70+
string getSinkModelQueryRepr(PotentialSinkModelExpr e) {
71+
result = "\nsinkModel: " + getSinkModelRepr(e)
72+
}
73+
74+
/**
75+
* A parameterised module that takes a dataflow config, and exposes a predicate for counting the
76+
* number of AI-generated sink models that appear in alerts for that query.
77+
*/
78+
private module SinkTallier<DataFlow::ConfigSig Config> {
79+
module ConfigFlow = TaintTracking::Global<Config>;
80+
81+
predicate getSinkModelCount(int c, string s) {
82+
s = getSinkModelRepr(any(ConfigFlow::PathNode sink).getNode().asExpr()) and
83+
c =
84+
strictcount(ConfigFlow::PathNode sink |
85+
ConfigFlow::flowPath(_, sink) and
86+
s = getSinkModelRepr(sink.getNode().asExpr())
87+
)
88+
}
89+
}
90+
91+
predicate sinkModelTallyPerQuery(string queryName, int alertCount, string sinkModel) {
92+
queryName = "java/request-forgery" and
93+
SinkTallier<RequestForgeryConfig>::getSinkModelCount(alertCount, sinkModel)
94+
or
95+
queryName = "java/command-line-injection" and
96+
exists(int c1, int c2 |
97+
SinkTallier<RemoteUserInputToArgumentToExecFlowConfig>::getSinkModelCount(c1, sinkModel) and
98+
SinkTallier<LocalUserInputToArgumentToExecFlowConfig>::getSinkModelCount(c2, sinkModel) and
99+
alertCount = c1 + c2
100+
)
101+
or
102+
queryName = "java/concatenated-sql-query" and
103+
SinkTallier<UncontrolledStringBuilderSourceFlowConfig>::getSinkModelCount(alertCount, sinkModel)
104+
or
105+
queryName = "java/ssrf" and
106+
SinkTallier<RequestForgeryConfig>::getSinkModelCount(alertCount, sinkModel)
107+
or
108+
queryName = "java/path-injection" and
109+
SinkTallier<TaintedPathConfig>::getSinkModelCount(alertCount, sinkModel)
110+
or
111+
queryName = "java/unvalidated-url-redirection" and
112+
SinkTallier<UrlRedirectConfig>::getSinkModelCount(alertCount, sinkModel)
113+
or
114+
queryName = "java/sql-injection" and
115+
SinkTallier<QueryInjectionFlowConfig>::getSinkModelCount(alertCount, sinkModel)
116+
}
117+
118+
predicate sinkModelTally(int alertCount, string sinkModel) {
119+
sinkModelTallyPerQuery(_, _, sinkModel) and
120+
alertCount = sum(int c | sinkModelTallyPerQuery(_, c, sinkModel))
121+
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
/**
2+
* @name Number of alerts per sink model
3+
* @description Counts the number of alerts using `ai-generated` sink models.
4+
* @kind table
5+
* @id java/ml/metrics-count-alerts-per-sink-model
6+
* @tags internal automodel metrics
7+
*/
8+
9+
private import java
10+
private import AutomodelAlertSinkUtil
11+
12+
from int alertCount, string sinkModel
13+
where sinkModelTally(alertCount, sinkModel)
14+
select alertCount, sinkModel
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
/**
2+
* @name Number of alerts per sink model and query
3+
* @description Counts the number of alerts per query using `ai-generated` sink models.
4+
* @kind table
5+
* @id java/ml/metrics-count-alerts-per-sink-model-and-query
6+
* @tags internal automodel metrics
7+
*/
8+
9+
private import java
10+
private import AutomodelAlertSinkUtil
11+
12+
from string queryId, int alertCount, string sinkModel
13+
where sinkModelTallyPerQuery(queryId, alertCount, sinkModel)
14+
select alertCount, sinkModel

java/ql/src/Telemetry/AutomodelJavaUtil.qll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ string getArgumentForIndex(int index) {
5656
* It would technically be ok to always use the value 'true', but this would
5757
* break convention.
5858
*/
59+
pragma[nomagic]
5960
boolean considerSubtypes(Callable callable) {
6061
if
6162
callable.isStatic() or

0 commit comments

Comments
 (0)