Skip to content

Commit 7705f0e

Browse files
author
Stephan Brandauer
authored
Merge pull request github#14162 from github/kaeluka/application-mode-source-candidates
Java: Automodel App Mode Extraction: Source Candidates
2 parents 01a74db + bbedd72 commit 7705f0e

16 files changed

+323
-77
lines changed

java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll

Lines changed: 163 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -35,22 +35,54 @@ newtype TApplicationModeEndpoint =
3535
argExpr.isVararg() and
3636
not exists(int i | i < idx and call.getArgument(i).(Argument).isVararg())
3737
)
38+
} or
39+
TMethodReturnValue(Call call) { not call instanceof ConstructorCall } or
40+
TOverriddenParameter(Parameter p, Method overriddenMethod) {
41+
not p.getCallable().callsConstructor(_) and
42+
p.getCallable().(Method).overrides(overriddenMethod)
3843
}
3944

4045
/**
4146
* An endpoint is a node that is a candidate for modeling.
4247
*/
4348
abstract private class ApplicationModeEndpoint extends TApplicationModeEndpoint {
44-
abstract predicate isArgOf(Call c, int idx);
49+
/**
50+
* Gets the callable to be modeled that this endpoint represents.
51+
*/
52+
abstract Callable getCallable();
53+
54+
abstract Call getCall();
4555

46-
Call getCall() { this.isArgOf(result, _) }
56+
/**
57+
* Gets the input (if any) for this endpoint, eg.: `Argument[0]`.
58+
*
59+
* For endpoints that are source candidates, this will be `none()`.
60+
*/
61+
abstract string getMaDInput();
4762

48-
int getArgIndex() { this.isArgOf(_, result) }
63+
/**
64+
* Gets the output (if any) for this endpoint, eg.: `ReturnValue`.
65+
*
66+
* For endpoints that are sink candidates, this will be `none()`.
67+
*/
68+
abstract string getMaDOutput();
4969

5070
abstract Top asTop();
5171

72+
/**
73+
* Converts the endpoint to a node that can be used in a data flow graph.
74+
*/
5275
abstract DataFlow::Node asNode();
5376

77+
string getExtensibleType() {
78+
if not exists(this.getMaDInput()) and exists(this.getMaDOutput())
79+
then result = "sourceModel"
80+
else
81+
if exists(this.getMaDInput()) and not exists(this.getMaDOutput())
82+
then result = "sinkModel"
83+
else none() // if both exist, it would be a summaryModel (not yet supported)
84+
}
85+
5486
abstract string toString();
5587
}
5688

@@ -63,7 +95,15 @@ class ExplicitArgument extends ApplicationModeEndpoint, TExplicitArgument {
6395

6496
ExplicitArgument() { this = TExplicitArgument(call, arg) }
6597

66-
override predicate isArgOf(Call c, int idx) { c = call and this.asTop() = c.getArgument(idx) }
98+
override Callable getCallable() { result = call.getCallee() }
99+
100+
override Call getCall() { result = call }
101+
102+
private int getArgIndex() { this.asTop() = call.getArgument(result) }
103+
104+
override string getMaDInput() { result = "Argument[" + this.getArgIndex() + "]" }
105+
106+
override string getMaDOutput() { none() }
67107

68108
override Top asTop() { result = arg.asExpr() }
69109

@@ -78,9 +118,13 @@ class InstanceArgument extends ApplicationModeEndpoint, TInstanceArgument {
78118

79119
InstanceArgument() { this = TInstanceArgument(call, arg) }
80120

81-
override predicate isArgOf(Call c, int idx) {
82-
c = call and this.asTop() = c.getQualifier() and idx = -1
83-
}
121+
override Callable getCallable() { result = call.getCallee() }
122+
123+
override Call getCall() { result = call }
124+
125+
override string getMaDInput() { result = "Argument[this]" }
126+
127+
override string getMaDOutput() { none() }
84128

85129
override Top asTop() { if exists(arg.asExpr()) then result = arg.asExpr() else result = call }
86130

@@ -105,15 +149,78 @@ class ImplicitVarargsArray extends ApplicationModeEndpoint, TImplicitVarargsArra
105149

106150
ImplicitVarargsArray() { this = TImplicitVarargsArray(call, vararg, idx) }
107151

108-
override predicate isArgOf(Call c, int i) { c = call and i = idx }
152+
override Callable getCallable() { result = call.getCallee() }
153+
154+
override Call getCall() { result = call }
155+
156+
override string getMaDInput() { result = "Argument[" + idx + "]" }
109157

110-
override Top asTop() { result = this.getCall() }
158+
override string getMaDOutput() { none() }
159+
160+
override Top asTop() { result = call }
111161

112162
override DataFlow::Node asNode() { result = vararg }
113163

114164
override string toString() { result = vararg.toString() }
115165
}
116166

167+
/**
168+
* An endpoint that represents a method call. The `ReturnValue` of a method call
169+
* may be a source.
170+
*/
171+
class MethodReturnValue extends ApplicationModeEndpoint, TMethodReturnValue {
172+
Call call;
173+
174+
MethodReturnValue() { this = TMethodReturnValue(call) }
175+
176+
override Callable getCallable() { result = call.getCallee() }
177+
178+
override Call getCall() { result = call }
179+
180+
override string getMaDInput() { none() }
181+
182+
override string getMaDOutput() { result = "ReturnValue" }
183+
184+
override Top asTop() { result = call }
185+
186+
override DataFlow::Node asNode() { result.asExpr() = call }
187+
188+
override string toString() { result = call.toString() }
189+
}
190+
191+
/**
192+
* An endpoint that represents a parameter of an overridden method that may be
193+
* a source.
194+
*/
195+
class OverriddenParameter extends ApplicationModeEndpoint, TOverriddenParameter {
196+
Parameter p;
197+
Method overriddenMethod;
198+
199+
OverriddenParameter() { this = TOverriddenParameter(p, overriddenMethod) }
200+
201+
override Callable getCallable() {
202+
// NB: we're returning the overridden callable here. This means that the
203+
// candidate model will be about the overridden method, not the overriding
204+
// method. This is a more general model, that also applies to other
205+
// subclasses of the overridden class.
206+
result = overriddenMethod
207+
}
208+
209+
override Call getCall() { none() }
210+
211+
private int getArgIndex() { p.getCallable().getParameter(result) = p }
212+
213+
override string getMaDInput() { none() }
214+
215+
override string getMaDOutput() { result = "Parameter[" + this.getArgIndex() + "]" }
216+
217+
override Top asTop() { result = p }
218+
219+
override DataFlow::Node asNode() { result.(DataFlow::ParameterNode).asParameter() = p }
220+
221+
override string toString() { result = p.toString() }
222+
}
223+
117224
/**
118225
* A candidates implementation.
119226
*
@@ -161,20 +268,39 @@ module ApplicationCandidatesImpl implements SharedCharacteristics::CandidateSig
161268
isCustomSink(e, kind) and provenance = "custom-sink"
162269
}
163270

271+
predicate isSource(Endpoint e, string kind, string provenance) {
272+
exists(string package, string type, string name, string signature, string ext, string output |
273+
sourceSpec(e, package, type, name, signature, ext, output) and
274+
ExternalFlow::sourceModel(package, type, _, name, [signature, ""], ext, output, kind,
275+
provenance)
276+
)
277+
}
278+
164279
predicate isNeutral(Endpoint e) {
165280
exists(string package, string type, string name, string signature |
166281
sinkSpec(e, package, type, name, signature, _, _) and
167282
ExternalFlow::neutralModel(package, type, name, [signature, ""], "sink", _)
168283
)
169284
}
170285

286+
// XXX how to extend to support sources?
171287
additional predicate sinkSpec(
172288
Endpoint e, string package, string type, string name, string signature, string ext, string input
173289
) {
174-
ApplicationModeGetCallable::getCallable(e).hasQualifiedName(package, type, name) and
175-
signature = ExternalFlow::paramsString(ApplicationModeGetCallable::getCallable(e)) and
290+
e.getCallable().hasQualifiedName(package, type, name) and
291+
signature = ExternalFlow::paramsString(e.getCallable()) and
292+
ext = "" and
293+
input = e.getMaDInput()
294+
}
295+
296+
additional predicate sourceSpec(
297+
Endpoint e, string package, string type, string name, string signature, string ext,
298+
string output
299+
) {
300+
e.getCallable().hasQualifiedName(package, type, name) and
301+
signature = ExternalFlow::paramsString(e.getCallable()) and
176302
ext = "" and
177-
input = AutomodelJavaUtil::getArgumentForIndex(e.getArgIndex())
303+
output = e.getMaDOutput()
178304
}
179305

180306
/**
@@ -229,11 +355,12 @@ class ApplicationModeMetadataExtractor extends string {
229355

230356
predicate hasMetadata(
231357
Endpoint e, string package, string type, string subtypes, string name, string signature,
232-
string input, string isVarargsArray
358+
string input, string output, string isVarargsArray
233359
) {
234360
exists(Callable callable |
235-
e.getCall().getCallee() = callable and
236-
input = AutomodelJavaUtil::getArgumentForIndex(e.getArgIndex()) and
361+
e.getCallable() = callable and
362+
(if exists(e.getMaDInput()) then input = e.getMaDInput() else input = "") and
363+
(if exists(e.getMaDOutput()) then output = e.getMaDOutput() else output = "") and
237364
package = callable.getDeclaringType().getPackage().getName() and
238365
// we're using the erased types because the MaD convention is to not specify type parameters.
239366
// Whether something is or isn't a sink doesn't usually depend on the type parameters.
@@ -266,8 +393,8 @@ private class UnexploitableIsCharacteristic extends CharacteristicsImpl::NotASin
266393

267394
override predicate appliesToEndpoint(Endpoint e) {
268395
not ApplicationCandidatesImpl::isSink(e, _, _) and
269-
ApplicationModeGetCallable::getCallable(e).getName().matches("is%") and
270-
ApplicationModeGetCallable::getCallable(e).getReturnType() instanceof BooleanType
396+
e.getCallable().getName().matches("is%") and
397+
e.getCallable().getReturnType() instanceof BooleanType
271398
}
272399
}
273400

@@ -313,9 +440,13 @@ private class IsMaDTaintStepCharacteristic extends CharacteristicsImpl::NotASink
313440
IsMaDTaintStepCharacteristic() { this = "taint step" }
314441

315442
override predicate appliesToEndpoint(Endpoint e) {
316-
FlowSummaryImpl::Private::Steps::summaryThroughStepValue(e.asNode(), _, _) or
317-
FlowSummaryImpl::Private::Steps::summaryThroughStepTaint(e.asNode(), _, _) or
318-
FlowSummaryImpl::Private::Steps::summaryGetterStep(e.asNode(), _, _, _) or
443+
e.getExtensibleType() = "sinkModel" and
444+
FlowSummaryImpl::Private::Steps::summaryThroughStepValue(e.asNode(), _, _)
445+
or
446+
FlowSummaryImpl::Private::Steps::summaryThroughStepTaint(e.asNode(), _, _)
447+
or
448+
FlowSummaryImpl::Private::Steps::summaryGetterStep(e.asNode(), _, _, _)
449+
or
319450
FlowSummaryImpl::Private::Steps::summarySetterStep(e.asNode(), _, _, _)
320451
}
321452
}
@@ -326,8 +457,8 @@ private class IsMaDTaintStepCharacteristic extends CharacteristicsImpl::NotASink
326457
* The reason is that we would expect data/taint flow into the method implementation to uncover
327458
* any sinks that are present there.
328459
*/
329-
private class ArgumentToLocalCall extends CharacteristicsImpl::UninterestingToModelCharacteristic {
330-
ArgumentToLocalCall() { this = "argument to local call" }
460+
private class LocalCall extends CharacteristicsImpl::UninterestingToModelCharacteristic {
461+
LocalCall() { this = "local call" }
331462

332463
override predicate appliesToEndpoint(Endpoint e) {
333464
ApplicationModeGetCallable::getCallable(e).fromSource()
@@ -354,6 +485,7 @@ private class NonPublicMethodCharacteristic extends CharacteristicsImpl::Uninter
354485
NonPublicMethodCharacteristic() { this = "non-public method" }
355486

356487
override predicate appliesToEndpoint(Endpoint e) {
488+
e.getExtensibleType() = "sinkModel" and
357489
not ApplicationModeGetCallable::getCallable(e).isPublic()
358490
}
359491
}
@@ -376,6 +508,7 @@ private class OtherArgumentToModeledMethodCharacteristic extends Characteristics
376508
}
377509

378510
override predicate appliesToEndpoint(Endpoint e) {
511+
e.getExtensibleType() = "sinkModel" and
379512
not ApplicationCandidatesImpl::isSink(e, _, _) and
380513
exists(Endpoint otherSink |
381514
ApplicationCandidatesImpl::isSink(otherSink, _, "manual") and
@@ -393,7 +526,10 @@ private class OtherArgumentToModeledMethodCharacteristic extends Characteristics
393526
private class FunctionValueCharacteristic extends CharacteristicsImpl::LikelyNotASinkCharacteristic {
394527
FunctionValueCharacteristic() { this = "function value" }
395528

396-
override predicate appliesToEndpoint(Endpoint e) { e.asNode().asExpr() instanceof FunctionalExpr }
529+
override predicate appliesToEndpoint(Endpoint e) {
530+
e.getExtensibleType() = "sinkModel" and
531+
e.asNode().asExpr() instanceof FunctionalExpr
532+
}
397533
}
398534

399535
/**
@@ -407,7 +543,10 @@ private class CannotBeTaintedCharacteristic extends CharacteristicsImpl::LikelyN
407543
{
408544
CannotBeTaintedCharacteristic() { this = "cannot be tainted" }
409545

410-
override predicate appliesToEndpoint(Endpoint e) { not this.isKnownOutNodeForStep(e) }
546+
override predicate appliesToEndpoint(Endpoint e) {
547+
e.getExtensibleType() = "sinkModel" and
548+
not this.isKnownOutNodeForStep(e)
549+
}
411550

412551
/**
413552
* Holds if the node `n` is known as the predecessor in a modeled flow step.

java/ql/automodel/src/AutomodelApplicationModeExtractCandidates.ql

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -25,18 +25,20 @@ private import AutomodelJavaUtil
2525
bindingset[limit]
2626
private Endpoint getSampleForSignature(
2727
int limit, string package, string type, string subtypes, string name, string signature,
28-
string input, string isVarargs
28+
string input, string output, string isVarargs, string extensibleType
2929
) {
3030
exists(int n, int num_endpoints, ApplicationModeMetadataExtractor meta |
3131
num_endpoints =
3232
count(Endpoint e |
33-
meta.hasMetadata(e, package, type, subtypes, name, signature, input, isVarargs)
33+
e.getExtensibleType() = extensibleType and
34+
meta.hasMetadata(e, package, type, subtypes, name, signature, input, output, isVarargs)
3435
)
3536
|
3637
result =
3738
rank[n](Endpoint e, Location loc |
3839
loc = e.asTop().getLocation() and
39-
meta.hasMetadata(e, package, type, subtypes, name, signature, input, isVarargs)
40+
e.getExtensibleType() = extensibleType and
41+
meta.hasMetadata(e, package, type, subtypes, name, signature, input, output, isVarargs)
4042
|
4143
e
4244
order by
@@ -55,43 +57,47 @@ private Endpoint getSampleForSignature(
5557
from
5658
Endpoint endpoint, string message, ApplicationModeMetadataExtractor meta, DollarAtString package,
5759
DollarAtString type, DollarAtString subtypes, DollarAtString name, DollarAtString signature,
58-
DollarAtString input, DollarAtString isVarargsArray, DollarAtString alreadyAiModeled
60+
DollarAtString input, DollarAtString output, DollarAtString isVarargsArray,
61+
DollarAtString alreadyAiModeled, DollarAtString extensibleType
5962
where
6063
not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u |
6164
u.appliesToEndpoint(endpoint)
6265
) and
6366
endpoint =
64-
getSampleForSignature(9, package, type, subtypes, name, signature, input, isVarargsArray) and
65-
// If a node is already a known sink for any of our existing ATM queries and is already modeled as a MaD sink, we
66-
// don't include it as a candidate. Otherwise, we might include it as a candidate for query A, but the model will
67-
// label it as a sink for one of the sink types of query B, for which it's already a known sink. This would result in
68-
// overlap between our detected sinks and the pre-existing modeling. We assume that, if a sink has already been
69-
// modeled in a MaD model, then it doesn't belong to any additional sink types, and we don't need to reexamine it.
67+
getSampleForSignature(9, package, type, subtypes, name, signature, input, output,
68+
isVarargsArray, extensibleType) and
69+
// If a node is already modeled in MaD, we don't include it as a candidate. Otherwise, we might include it as a
70+
// candidate for query A, but the model will label it as a sink for one of the sink types of query B, for which it's
71+
// already a known sink. This would result in overlap between our detected sinks and the pre-existing modeling. We
72+
// assume that, if a sink has already been modeled in a MaD model, then it doesn't belong to any additional sink
73+
// types, and we don't need to reexamine it.
7074
(
71-
not CharacteristicsImpl::isSink(endpoint, _, _) and alreadyAiModeled = ""
75+
not CharacteristicsImpl::isModeled(endpoint, _, _, _) and alreadyAiModeled = ""
7276
or
7377
alreadyAiModeled.matches("%ai-%") and
74-
CharacteristicsImpl::isSink(endpoint, _, alreadyAiModeled)
78+
CharacteristicsImpl::isModeled(endpoint, _, _, alreadyAiModeled)
7579
) and
76-
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, isVarargsArray) and
80+
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, isVarargsArray) and
7781
includeAutomodelCandidate(package, type, name, signature) and
7882
// The message is the concatenation of all sink types for which this endpoint is known neither to be a sink nor to be
7983
// a non-sink, and we surface only endpoints that have at least one such sink type.
8084
message =
8185
strictconcat(AutomodelEndpointTypes::SinkType sinkType |
82-
not CharacteristicsImpl::isKnownSink(endpoint, sinkType, _) and
86+
not CharacteristicsImpl::isKnownAs(endpoint, sinkType, _) and
8387
CharacteristicsImpl::isSinkCandidate(endpoint, sinkType)
8488
|
8589
sinkType, ", "
8690
)
8791
select endpoint.asNode(),
88-
message + "\nrelated locations: $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@.", //
92+
message + "\nrelated locations: $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@.", //
8993
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext()), "CallContext", //
9094
package, "package", //
9195
type, "type", //
9296
subtypes, "subtypes", //
9397
name, "name", // method name
9498
signature, "signature", //
9599
input, "input", //
100+
output, "output", //
96101
isVarargsArray, "isVarargsArray", //
97-
alreadyAiModeled, "alreadyAiModeled"
102+
alreadyAiModeled, "alreadyAiModeled", //
103+
extensibleType, "extensibleType"

0 commit comments

Comments
 (0)