Skip to content

Commit 20254c3

Browse files
author
Stephan Brandauer
authored
Merge pull request github#13886 from github/kaeluka/java-automodel-variadic-args
Java: automodel application mode: use endpoint class like in framework mode
2 parents b8372c2 + 1a95a34 commit 20254c3

8 files changed

+162
-99
lines changed

java/ql/src/Telemetry/AutomodelApplicationModeCharacteristics.qll

Lines changed: 114 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -21,21 +21,97 @@ import AutomodelEndpointTypes as AutomodelEndpointTypes
2121

2222
newtype JavaRelatedLocationType = CallContext()
2323

24+
newtype TApplicationModeEndpoint =
25+
TExplicitArgument(Call call, DataFlow::Node arg) {
26+
exists(Argument argExpr |
27+
arg.asExpr() = argExpr and call = argExpr.getCall() and not argExpr.isVararg()
28+
)
29+
} or
30+
TInstanceArgument(Call call, DataFlow::Node arg) { arg = DataFlow::getInstanceArgument(call) } or
31+
TImplicitVarargsArray(Call call, DataFlow::Node arg, int idx) {
32+
exists(Argument argExpr |
33+
arg.asExpr() = argExpr and
34+
call.getArgument(idx) = argExpr and
35+
argExpr.isVararg() and
36+
not exists(int i | i < idx and call.getArgument(i).(Argument).isVararg())
37+
)
38+
}
39+
40+
/**
41+
* An endpoint is a node that is a candidate for modeling.
42+
*/
43+
abstract private class ApplicationModeEndpoint extends TApplicationModeEndpoint {
44+
abstract predicate isArgOf(Call c, int idx);
45+
46+
Call getCall() { this.isArgOf(result, _) }
47+
48+
int getArgIndex() { this.isArgOf(_, result) }
49+
50+
abstract Top asTop();
51+
52+
abstract DataFlow::Node asNode();
53+
54+
abstract string toString();
55+
}
56+
2457
/**
2558
* A class representing nodes that are arguments to calls.
2659
*/
27-
private class ArgumentNode extends DataFlow::Node {
28-
Call c;
60+
class ExplicitArgument extends ApplicationModeEndpoint, TExplicitArgument {
61+
Call call;
62+
DataFlow::Node arg;
2963

30-
ArgumentNode() {
31-
exists(Argument arg | this.asExpr() = arg and not arg.isVararg() and c = arg.getCall())
32-
or
33-
this.(DataFlow::ImplicitVarargsArray).getCall() = c
34-
or
35-
this = DataFlow::getInstanceArgument(c)
64+
ExplicitArgument() { this = TExplicitArgument(call, arg) }
65+
66+
override predicate isArgOf(Call c, int idx) { c = call and this.asTop() = c.getArgument(idx) }
67+
68+
override Top asTop() { result = arg.asExpr() }
69+
70+
override DataFlow::Node asNode() { result = arg }
71+
72+
override string toString() { result = arg.toString() }
73+
}
74+
75+
class InstanceArgument extends ApplicationModeEndpoint, TInstanceArgument {
76+
Call call;
77+
DataFlow::Node arg;
78+
79+
InstanceArgument() { this = TInstanceArgument(call, arg) }
80+
81+
override predicate isArgOf(Call c, int idx) {
82+
c = call and this.asTop() = c.getQualifier() and idx = -1
3683
}
3784

38-
Call getCall() { result = c }
85+
override Top asTop() { if exists(arg.asExpr()) then result = arg.asExpr() else result = call }
86+
87+
override DataFlow::Node asNode() { result = arg }
88+
89+
override string toString() { result = arg.toString() }
90+
}
91+
92+
/**
93+
* An endpoint that represents an implicit varargs array.
94+
* We choose to represent the varargs array as a single endpoint, rather than as multiple endpoints.
95+
*
96+
* This avoids the problem of having to deal with redundant endpoints downstream.
97+
*
98+
* In order to be able to distinguish between varargs endpoints and regular endpoints, we export the `isVarargsArray`
99+
* meta data field in the extraction queries.
100+
*/
101+
class ImplicitVarargsArray extends ApplicationModeEndpoint, TImplicitVarargsArray {
102+
Call call;
103+
DataFlow::Node vararg;
104+
int idx;
105+
106+
ImplicitVarargsArray() { this = TImplicitVarargsArray(call, vararg, idx) }
107+
108+
override predicate isArgOf(Call c, int i) { c = call and i = idx }
109+
110+
override Top asTop() { result = this.getCall() }
111+
112+
override DataFlow::Node asNode() { result = vararg }
113+
114+
override string toString() { result = vararg.toString() }
39115
}
40116

41117
/**
@@ -47,7 +123,7 @@ private class ArgumentNode extends DataFlow::Node {
47123
*/
48124
module ApplicationCandidatesImpl implements SharedCharacteristics::CandidateSig {
49125
// for documentation of the implementations here, see the QLDoc in the CandidateSig signature module.
50-
class Endpoint = ArgumentNode;
126+
class Endpoint = ApplicationModeEndpoint;
51127

52128
class EndpointType = AutomodelEndpointTypes::EndpointType;
53129

@@ -61,18 +137,18 @@ module ApplicationCandidatesImpl implements SharedCharacteristics::CandidateSig
61137
predicate isSanitizer(Endpoint e, EndpointType t) {
62138
exists(t) and
63139
(
64-
e.getType() instanceof BoxedType
140+
e.asNode().getType() instanceof BoxedType
65141
or
66-
e.getType() instanceof PrimitiveType
142+
e.asNode().getType() instanceof PrimitiveType
67143
or
68-
e.getType() instanceof NumberType
144+
e.asNode().getType() instanceof NumberType
69145
)
70146
or
71147
t instanceof AutomodelEndpointTypes::PathInjectionSinkType and
72-
e instanceof PathSanitizer::PathInjectionSanitizer
148+
e.asNode() instanceof PathSanitizer::PathInjectionSanitizer
73149
}
74150

75-
RelatedLocation asLocation(Endpoint e) { result = e.asExpr() }
151+
RelatedLocation asLocation(Endpoint e) { result = e.asTop() }
76152

77153
predicate isKnownKind = AutomodelJavaUtil::isKnownKind/2;
78154

@@ -98,16 +174,7 @@ module ApplicationCandidatesImpl implements SharedCharacteristics::CandidateSig
98174
ApplicationModeGetCallable::getCallable(e).hasQualifiedName(package, type, name) and
99175
signature = ExternalFlow::paramsString(ApplicationModeGetCallable::getCallable(e)) and
100176
ext = "" and
101-
(
102-
exists(Call c, int argIdx |
103-
e.asExpr() = c.getArgument(argIdx) and
104-
input = AutomodelJavaUtil::getArgumentForIndex(argIdx)
105-
)
106-
or
107-
exists(Call c |
108-
e.asExpr() = c.getQualifier() and input = AutomodelJavaUtil::getArgumentForIndex(-1)
109-
)
110-
)
177+
input = AutomodelJavaUtil::getArgumentForIndex(e.getArgIndex())
111178
}
112179

113180
/**
@@ -118,7 +185,7 @@ module ApplicationCandidatesImpl implements SharedCharacteristics::CandidateSig
118185
*/
119186
RelatedLocation getRelatedLocation(Endpoint e, RelatedLocationType type) {
120187
type = CallContext() and
121-
result = any(Call c | e.asExpr() = [c.getAnArgument(), c.getQualifier()])
188+
result = e.getCall()
122189
}
123190
}
124191

@@ -132,20 +199,15 @@ private module ApplicationModeGetCallable implements AutomodelSharedGetCallable:
132199
/**
133200
* Returns the API callable being modeled.
134201
*/
135-
Callable getCallable(Endpoint e) {
136-
exists(Call c |
137-
e.asExpr() = [c.getAnArgument(), c.getQualifier()] and
138-
result = c.getCallee()
139-
)
140-
}
202+
Callable getCallable(Endpoint e) { result = e.getCall().getCallee() }
141203
}
142204

143205
/**
144206
* Contains endpoints that are defined in QL code rather than as a MaD model. Ideally this predicate
145207
* should be empty.
146208
*/
147209
private predicate isCustomSink(Endpoint e, string kind) {
148-
e instanceof QueryInjectionSink and kind = "sql"
210+
e.asNode() instanceof QueryInjectionSink and kind = "sql"
149211
}
150212

151213
module CharacteristicsImpl =
@@ -167,23 +229,21 @@ class ApplicationModeMetadataExtractor extends string {
167229

168230
predicate hasMetadata(
169231
Endpoint e, string package, string type, string subtypes, string name, string signature,
170-
string input
232+
string input, string isVarargsArray
171233
) {
172-
exists(Call call, Callable callable, int argIdx |
173-
call.getCallee() = callable and
174-
(
175-
e.asExpr() = call.getArgument(argIdx)
176-
or
177-
e.asExpr() = call.getQualifier() and argIdx = -1
178-
) and
179-
input = AutomodelJavaUtil::getArgumentForIndex(argIdx) and
234+
exists(Callable callable |
235+
e.getCall().getCallee() = callable and
236+
input = AutomodelJavaUtil::getArgumentForIndex(e.getArgIndex()) and
180237
package = callable.getDeclaringType().getPackage().getName() and
181238
// we're using the erased types because the MaD convention is to not specify type parameters.
182239
// Whether something is or isn't a sink doesn't usually depend on the type parameters.
183240
type = callable.getDeclaringType().getErasure().(RefType).nestedName() and
184241
subtypes = AutomodelJavaUtil::considerSubtypes(callable).toString() and
185242
name = callable.getName() and
186-
signature = ExternalFlow::paramsString(callable)
243+
signature = ExternalFlow::paramsString(callable) and
244+
if e instanceof ImplicitVarargsArray
245+
then isVarargsArray = "true"
246+
else isVarargsArray = "false"
187247
)
188248
}
189249
}
@@ -253,28 +313,10 @@ private class IsMaDTaintStepCharacteristic extends CharacteristicsImpl::NotASink
253313
IsMaDTaintStepCharacteristic() { this = "taint step" }
254314

255315
override predicate appliesToEndpoint(Endpoint e) {
256-
FlowSummaryImpl::Private::Steps::summaryThroughStepValue(e, _, _) or
257-
FlowSummaryImpl::Private::Steps::summaryThroughStepTaint(e, _, _) or
258-
FlowSummaryImpl::Private::Steps::summaryGetterStep(e, _, _, _) or
259-
FlowSummaryImpl::Private::Steps::summarySetterStep(e, _, _, _)
260-
}
261-
}
262-
263-
/**
264-
* A negative characteristic that filters out qualifiers that are classes (i.e. static calls). These
265-
* are unlikely to have any non-trivial flow going into them.
266-
*
267-
* Technically, an accessed type _could_ come from outside of the source code, but there's not
268-
* much likelihood of that being user-controlled.
269-
*/
270-
private class ClassQualifierCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic {
271-
ClassQualifierCharacteristic() { this = "class qualifier" }
272-
273-
override predicate appliesToEndpoint(Endpoint e) {
274-
exists(Call c |
275-
e.asExpr() = c.getQualifier() and
276-
c.getQualifier() instanceof TypeAccess
277-
)
316+
FlowSummaryImpl::Private::Steps::summaryThroughStepValue(e.asNode(), _, _) or
317+
FlowSummaryImpl::Private::Steps::summaryThroughStepTaint(e.asNode(), _, _) or
318+
FlowSummaryImpl::Private::Steps::summaryGetterStep(e.asNode(), _, _, _) or
319+
FlowSummaryImpl::Private::Steps::summarySetterStep(e.asNode(), _, _, _)
278320
}
279321
}
280322

@@ -351,7 +393,7 @@ private class OtherArgumentToModeledMethodCharacteristic extends Characteristics
351393
private class FunctionValueCharacteristic extends CharacteristicsImpl::LikelyNotASinkCharacteristic {
352394
FunctionValueCharacteristic() { this = "function value" }
353395

354-
override predicate appliesToEndpoint(Endpoint e) { e.asExpr() instanceof FunctionalExpr }
396+
override predicate appliesToEndpoint(Endpoint e) { e.asNode().asExpr() instanceof FunctionalExpr }
355397
}
356398

357399
/**
@@ -371,12 +413,12 @@ private class CannotBeTaintedCharacteristic extends CharacteristicsImpl::LikelyN
371413
* Holds if the node `n` is known as the predecessor in a modeled flow step.
372414
*/
373415
private predicate isKnownOutNodeForStep(Endpoint e) {
374-
e.asExpr() instanceof Call or // we just assume flow in that case
375-
TaintTracking::localTaintStep(_, e) or
376-
FlowSummaryImpl::Private::Steps::summaryThroughStepValue(_, e, _) or
377-
FlowSummaryImpl::Private::Steps::summaryThroughStepTaint(_, e, _) or
378-
FlowSummaryImpl::Private::Steps::summaryGetterStep(_, _, e, _) or
379-
FlowSummaryImpl::Private::Steps::summarySetterStep(_, _, e, _)
416+
e.asNode().asExpr() instanceof Call or // we just assume flow in that case
417+
TaintTracking::localTaintStep(_, e.asNode()) or
418+
FlowSummaryImpl::Private::Steps::summaryThroughStepValue(_, e.asNode(), _) or
419+
FlowSummaryImpl::Private::Steps::summaryThroughStepTaint(_, e.asNode(), _) or
420+
FlowSummaryImpl::Private::Steps::summaryGetterStep(_, _, e.asNode(), _) or
421+
FlowSummaryImpl::Private::Steps::summarySetterStep(_, _, e.asNode(), _)
380422
}
381423
}
382424

java/ql/src/Telemetry/AutomodelApplicationModeExtractCandidates.ql

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,16 +25,18 @@ private import AutomodelJavaUtil
2525
bindingset[limit]
2626
private Endpoint getSampleForSignature(
2727
int limit, string package, string type, string subtypes, string name, string signature,
28-
string input
28+
string input, string isVarargs
2929
) {
3030
exists(int n, int num_endpoints, ApplicationModeMetadataExtractor meta |
3131
num_endpoints =
32-
count(Endpoint e | meta.hasMetadata(e, package, type, subtypes, name, signature, input))
32+
count(Endpoint e |
33+
meta.hasMetadata(e, package, type, subtypes, name, signature, input, isVarargs)
34+
)
3335
|
3436
result =
3537
rank[n](Endpoint e, Location loc |
36-
loc = e.getLocation() and
37-
meta.hasMetadata(e, package, type, subtypes, name, signature, input)
38+
loc = e.asTop().getLocation() and
39+
meta.hasMetadata(e, package, type, subtypes, name, signature, input, isVarargs)
3840
|
3941
e
4042
order by
@@ -53,19 +55,20 @@ private Endpoint getSampleForSignature(
5355
from
5456
Endpoint endpoint, string message, ApplicationModeMetadataExtractor meta, DollarAtString package,
5557
DollarAtString type, DollarAtString subtypes, DollarAtString name, DollarAtString signature,
56-
DollarAtString input
58+
DollarAtString input, DollarAtString isVarargsArray
5759
where
5860
not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u |
5961
u.appliesToEndpoint(endpoint)
6062
) and
61-
endpoint = getSampleForSignature(9, package, type, subtypes, name, signature, input) and
63+
endpoint =
64+
getSampleForSignature(9, package, type, subtypes, name, signature, input, isVarargsArray) and
6265
// If a node is already a known sink for any of our existing ATM queries and is already modeled as a MaD sink, we
6366
// don't include it as a candidate. Otherwise, we might include it as a candidate for query A, but the model will
6467
// label it as a sink for one of the sink types of query B, for which it's already a known sink. This would result in
6568
// overlap between our detected sinks and the pre-existing modeling. We assume that, if a sink has already been
6669
// modeled in a MaD model, then it doesn't belong to any additional sink types, and we don't need to reexamine it.
6770
not CharacteristicsImpl::isSink(endpoint, _, _) and
68-
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input) and
71+
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, isVarargsArray) and
6972
includeAutomodelCandidate(package, type, name, signature) and
7073
// The message is the concatenation of all sink types for which this endpoint is known neither to be a sink nor to be
7174
// a non-sink, and we surface only endpoints that have at least one such sink type.
@@ -76,11 +79,13 @@ where
7679
|
7780
sinkType, ", "
7881
)
79-
select endpoint, message + "\nrelated locations: $@." + "\nmetadata: $@, $@, $@, $@, $@, $@.", //
82+
select endpoint.asNode(),
83+
message + "\nrelated locations: $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@.", //
8084
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext()), "CallContext", //
8185
package, "package", //
8286
type, "type", //
8387
subtypes, "subtypes", //
8488
name, "name", // method name
8589
signature, "signature", //
86-
input, "input" //
90+
input, "input", //
91+
isVarargsArray, "isVarargsArray"

java/ql/src/Telemetry/AutomodelApplicationModeExtractNegativeExamples.ql

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ Endpoint getSampleForCharacteristic(EndpointCharacteristic c, int limit) {
2424
exists(int n, int num_endpoints | num_endpoints = count(Endpoint e | c.appliesToEndpoint(e)) |
2525
result =
2626
rank[n](Endpoint e, Location loc |
27-
loc = e.getLocation() and c.appliesToEndpoint(e)
27+
loc = e.asTop().getLocation() and c.appliesToEndpoint(e)
2828
|
2929
e
3030
order by
@@ -43,15 +43,16 @@ Endpoint getSampleForCharacteristic(EndpointCharacteristic c, int limit) {
4343
from
4444
Endpoint endpoint, EndpointCharacteristic characteristic, float confidence, string message,
4545
ApplicationModeMetadataExtractor meta, DollarAtString package, DollarAtString type,
46-
DollarAtString subtypes, DollarAtString name, DollarAtString signature, DollarAtString input
46+
DollarAtString subtypes, DollarAtString name, DollarAtString signature, DollarAtString input,
47+
DollarAtString isVarargsArray
4748
where
4849
endpoint = getSampleForCharacteristic(characteristic, 100) and
4950
confidence >= SharedCharacteristics::highConfidence() and
5051
characteristic.hasImplications(any(NegativeSinkType negative), true, confidence) and
5152
// Exclude endpoints that have contradictory endpoint characteristics, because we only want examples we're highly
5253
// certain about in the prompt.
5354
not erroneousEndpoints(endpoint, _, _, _, _, false) and
54-
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input) and
55+
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, isVarargsArray) and
5556
// It's valid for a node to satisfy the logic for both `isSink` and `isSanitizer`, but in that case it will be
5657
// treated by the actual query as a sanitizer, since the final logic is something like
5758
// `isSink(n) and not isSanitizer(n)`. We don't want to include such nodes as negative examples in the prompt, because
@@ -63,11 +64,13 @@ where
6364
characteristic2.hasImplications(positiveType, true, confidence2)
6465
) and
6566
message = characteristic
66-
select endpoint, message + "\nrelated locations: $@." + "\nmetadata: $@, $@, $@, $@, $@, $@.", //
67+
select endpoint.asNode(),
68+
message + "\nrelated locations: $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@.", //
6769
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext()), "CallContext", //
6870
package, "package", //
6971
type, "type", //
7072
subtypes, "subtypes", //
7173
name, "name", //
7274
signature, "signature", //
73-
input, "input" //
75+
input, "input", //
76+
isVarargsArray, "isVarargsArray" //

0 commit comments

Comments
 (0)