Skip to content

Commit bb63fcd

Browse files
author
Max Schaefer
committed
Refactor to avoid bad join order.
1 parent 45ca301 commit bb63fcd

9 files changed

+74
-62
lines changed

java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -356,10 +356,10 @@ class ApplicationModeMetadataExtractor extends string {
356356

357357
predicate hasMetadata(
358358
Endpoint e, string package, string type, string subtypes, string name, string signature,
359-
string input, string output, string isVarargsArray
359+
string input, string output, string isVarargsArray, string alreadyAiModeled,
360+
string extensibleType
360361
) {
361-
exists(Callable callable |
362-
e.getCallable() = callable and
362+
exists(Callable callable | e.getCallable() = callable |
363363
(if exists(e.getMaDInput()) then input = e.getMaDInput() else input = "") and
364364
(if exists(e.getMaDOutput()) then output = e.getMaDOutput() else output = "") and
365365
package = callable.getDeclaringType().getPackage().getName() and
@@ -369,9 +369,17 @@ class ApplicationModeMetadataExtractor extends string {
369369
subtypes = AutomodelJavaUtil::considerSubtypes(callable).toString() and
370370
name = callable.getName() and
371371
signature = ExternalFlow::paramsString(callable) and
372-
if e instanceof ImplicitVarargsArray
373-
then isVarargsArray = "true"
374-
else isVarargsArray = "false"
372+
(
373+
if e instanceof ImplicitVarargsArray
374+
then isVarargsArray = "true"
375+
else isVarargsArray = "false"
376+
) and
377+
extensibleType = e.getExtensibleType()
378+
) and
379+
(
380+
not CharacteristicsImpl::isModeled(e, _, _, _) and alreadyAiModeled = ""
381+
or
382+
CharacteristicsImpl::isModeled(e, _, _, alreadyAiModeled)
375383
)
376384
}
377385
}
@@ -416,7 +424,8 @@ private class UnexploitableIsCharacteristic extends CharacteristicsImpl::Neither
416424
* boolean return type. These kinds of calls normally do only checks, and appear before the proper call that does the
417425
* dangerous/interesting thing, so we want the latter to be modeled as the sink.
418426
*/
419-
private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic {
427+
private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic
428+
{
420429
UnexploitableExistsCharacteristic() { this = "unexploitable (existence-checking boolean method)" }
421430

422431
override predicate appliesToEndpoint(Endpoint e) {
@@ -439,7 +448,8 @@ private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::Nei
439448
* A negative characteristic that indicates that parameters of an exception method or constructor should not be considered sinks,
440449
* and its return value should not be considered a source.
441450
*/
442-
private class ExceptionCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic {
451+
private class ExceptionCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic
452+
{
443453
ExceptionCharacteristic() { this = "exception" }
444454

445455
override predicate appliesToEndpoint(Endpoint e) {

java/ql/automodel/src/AutomodelApplicationModeExtractCandidates.ql

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -25,20 +25,20 @@ private import AutomodelJavaUtil
2525
bindingset[limit]
2626
private Endpoint getSampleForSignature(
2727
int limit, string package, string type, string subtypes, string name, string signature,
28-
string input, string output, string isVarargs, string extensibleType
28+
string input, string output, string isVarargs, string extensibleType, string alreadyAiModeled
2929
) {
3030
exists(int n, int num_endpoints, ApplicationModeMetadataExtractor meta |
3131
num_endpoints =
3232
count(Endpoint e |
33-
e.getExtensibleType() = extensibleType and
34-
meta.hasMetadata(e, package, type, subtypes, name, signature, input, output, isVarargs)
33+
meta.hasMetadata(e, package, type, subtypes, name, signature, input, output, isVarargs,
34+
alreadyAiModeled, extensibleType)
3535
)
3636
|
3737
result =
3838
rank[n](Endpoint e, Location loc |
3939
loc = e.asTop().getLocation() and
40-
e.getExtensibleType() = extensibleType and
41-
meta.hasMetadata(e, package, type, subtypes, name, signature, input, output, isVarargs)
40+
meta.hasMetadata(e, package, type, subtypes, name, signature, input, output, isVarargs,
41+
alreadyAiModeled, extensibleType)
4242
|
4343
e
4444
order by
@@ -66,19 +66,15 @@ where
6666
CharacteristicsImpl::isCandidate(endpoint, _) and
6767
endpoint =
6868
getSampleForSignature(9, package, type, subtypes, name, signature, input, output,
69-
isVarargsArray, extensibleType) and
69+
isVarargsArray, extensibleType, alreadyAiModeled) and
70+
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output,
71+
isVarargsArray, alreadyAiModeled, extensibleType) and
7072
// If a node is already modeled in MaD, we don't include it as a candidate. Otherwise, we might include it as a
7173
// candidate for query A, but the model will label it as a sink for one of the sink types of query B, for which it's
7274
// already a known sink. This would result in overlap between our detected sinks and the pre-existing modeling. We
7375
// assume that, if a sink has already been modeled in a MaD model, then it doesn't belong to any additional sink
7476
// types, and we don't need to reexamine it.
75-
(
76-
not CharacteristicsImpl::isModeled(endpoint, _, _, _) and alreadyAiModeled = ""
77-
or
78-
alreadyAiModeled.matches("%ai-%") and
79-
CharacteristicsImpl::isModeled(endpoint, _, _, alreadyAiModeled)
80-
) and
81-
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, isVarargsArray) and
77+
alreadyAiModeled.matches(["", "%ai-%"]) and
8278
includeAutomodelCandidate(package, type, name, signature)
8379
select endpoint.asNode(),
8480
"Related locations: $@, $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@.", //

java/ql/automodel/src/AutomodelApplicationModeExtractNegativeExamples.ql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,15 +47,15 @@ from
4747
DollarAtString output, DollarAtString isVarargsArray, DollarAtString extensibleType
4848
where
4949
endpoint = getSampleForCharacteristic(characteristic, 100) and
50-
extensibleType = endpoint.getExtensibleType() and
5150
// the node is know not to be an endpoint of any appropriate type
5251
forall(EndpointType tp | tp = endpoint.getAPotentialType() |
5352
characteristic.hasImplications(tp, false, _)
5453
) and
5554
// the lowest confidence across all endpoint types should be at least highConfidence
5655
confidence = min(float c | characteristic.hasImplications(endpoint.getAPotentialType(), false, c)) and
5756
confidence >= SharedCharacteristics::highConfidence() and
58-
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, isVarargsArray) and
57+
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output,
58+
isVarargsArray, _, extensibleType) and
5959
// It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes
6060
// as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly them here.
6161
not exists(EndpointCharacteristic characteristic2, float confidence2, EndpointType type2 |

java/ql/automodel/src/AutomodelApplicationModeExtractPositiveExamples.ql

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,8 @@ from
1818
DollarAtString signature, DollarAtString input, DollarAtString output,
1919
DollarAtString isVarargsArray, DollarAtString extensibleType
2020
where
21-
extensibleType = endpoint.getExtensibleType() and
22-
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, isVarargsArray) and
23-
// Extract positive examples of sinks belonging to the existing ATM query configurations.
21+
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output,
22+
isVarargsArray, _, extensibleType) and
2423
CharacteristicsImpl::isKnownAs(endpoint, endpointType, _) and
2524
exists(CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext()))
2625
select endpoint.asNode(),

java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -305,16 +305,27 @@ class FrameworkModeMetadataExtractor extends string {
305305

306306
predicate hasMetadata(
307307
Endpoint e, string package, string type, string subtypes, string name, string signature,
308-
string input, string output, string parameterName
308+
string input, string output, string parameterName, string alreadyAiModeled,
309+
string extensibleType
309310
) {
310-
(if exists(e.getParamName()) then parameterName = e.getParamName() else parameterName = "") and
311-
name = e.getCallable().getName() and
312-
(if exists(e.getMaDInput()) then input = e.getMaDInput() else input = "") and
313-
(if exists(e.getMaDOutput()) then output = e.getMaDOutput() else output = "") and
314-
package = e.getCallable().getDeclaringType().getPackage().getName() and
315-
type = e.getCallable().getDeclaringType().getErasure().(RefType).nestedName() and
316-
subtypes = AutomodelJavaUtil::considerSubtypes(e.getCallable()).toString() and
317-
signature = ExternalFlow::paramsString(e.getCallable())
311+
exists(Callable callable | e.getCallable() = callable |
312+
(if exists(e.getMaDInput()) then input = e.getMaDInput() else input = "") and
313+
(if exists(e.getMaDOutput()) then output = e.getMaDOutput() else output = "") and
314+
package = callable.getDeclaringType().getPackage().getName() and
315+
// we're using the erased types because the MaD convention is to not specify type parameters.
316+
// Whether something is or isn't a sink doesn't usually depend on the type parameters.
317+
type = callable.getDeclaringType().getErasure().(RefType).nestedName() and
318+
subtypes = AutomodelJavaUtil::considerSubtypes(callable).toString() and
319+
name = callable.getName() and
320+
signature = ExternalFlow::paramsString(callable) and
321+
(if exists(e.getParamName()) then parameterName = e.getParamName() else parameterName = "") and
322+
e.getExtensibleType() = extensibleType
323+
) and
324+
(
325+
not CharacteristicsImpl::isModeled(e, _, _, _) and alreadyAiModeled = ""
326+
or
327+
CharacteristicsImpl::isModeled(e, _, _, alreadyAiModeled)
328+
)
318329
}
319330
}
320331

@@ -332,7 +343,8 @@ class FrameworkModeMetadataExtractor extends string {
332343
*
333344
* TODO: this might filter too much, it's possible that methods with more than one parameter contain interesting sinks
334345
*/
335-
private class UnexploitableIsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic {
346+
private class UnexploitableIsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic
347+
{
336348
UnexploitableIsCharacteristic() { this = "unexploitable (is-style boolean method)" }
337349

338350
override predicate appliesToEndpoint(Endpoint e) {
@@ -357,7 +369,8 @@ private class UnexploitableIsCharacteristic extends CharacteristicsImpl::Neither
357369
* boolean return type. These kinds of calls normally do only checks, and appear before the proper call that does the
358370
* dangerous/interesting thing, so we want the latter to be modeled as the sink.
359371
*/
360-
private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic {
372+
private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic
373+
{
361374
UnexploitableExistsCharacteristic() { this = "unexploitable (existence-checking boolean method)" }
362375

363376
override predicate appliesToEndpoint(Endpoint e) {
@@ -380,7 +393,8 @@ private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::Nei
380393
* A negative characteristic that indicates that parameters of an exception method or constructor should not be considered sinks,
381394
* and its return value should not be considered a source.
382395
*/
383-
private class ExceptionCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic {
396+
private class ExceptionCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic
397+
{
384398
ExceptionCharacteristic() { this = "exception" }
385399

386400
override predicate appliesToEndpoint(Endpoint e) {
@@ -396,7 +410,6 @@ private class ExceptionCharacteristic extends CharacteristicsImpl::NeitherSource
396410
}
397411
}
398412

399-
400413
/**
401414
* A characteristic that limits candidates to parameters of methods that are recognized as `ModelApi`, iow., APIs that
402415
* are considered worth modeling.

java/ql/automodel/src/AutomodelFrameworkModeExtractCandidates.ql

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -21,23 +21,18 @@ from
2121
DollarAtString input, DollarAtString output, DollarAtString parameterName,
2222
DollarAtString alreadyAiModeled, DollarAtString extensibleType
2323
where
24-
endpoint.getExtensibleType() = extensibleType and
2524
not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u |
2625
u.appliesToEndpoint(endpoint)
2726
) and
2827
CharacteristicsImpl::isCandidate(endpoint, _) and
29-
// If a node is already a known sink for any of our existing ATM queries and is already modeled as a MaD sink, we
30-
// don't include it as a candidate. Otherwise, we might include it as a candidate for query A, but the model will
31-
// label it as a sink for one of the sink types of query B, for which it's already a known sink. This would result in
32-
// overlap between our detected sinks and the pre-existing modeling. We assume that, if a sink has already been
33-
// modeled in a MaD model, then it doesn't belong to any additional sink types, and we don't need to reexamine it.
34-
(
35-
not CharacteristicsImpl::isSink(endpoint, _, _) and alreadyAiModeled = ""
36-
or
37-
alreadyAiModeled.matches("%ai-%") and
38-
CharacteristicsImpl::isSink(endpoint, _, alreadyAiModeled)
39-
) and
40-
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName) and
28+
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
29+
alreadyAiModeled, extensibleType) and
30+
// If a node is already modeled in MaD, we don't include it as a candidate. Otherwise, we might include it as a
31+
// candidate for query A, but the model will label it as a sink for one of the sink types of query B, for which it's
32+
// already a known sink. This would result in overlap between our detected sinks and the pre-existing modeling. We
33+
// assume that, if a sink has already been modeled in a MaD model, then it doesn't belong to any additional sink
34+
// types, and we don't need to reexamine it.
35+
alreadyAiModeled.matches(["", "%ai-%"]) and
4136
includeAutomodelCandidate(package, type, name, signature)
4237
select endpoint,
4338
"Related locations: $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@.", //

java/ql/automodel/src/AutomodelFrameworkModeExtractNegativeExamples.ql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ from
1919
DollarAtString input, DollarAtString output, DollarAtString parameterName,
2020
DollarAtString extensibleType
2121
where
22-
endpoint.getExtensibleType() = extensibleType and
2322
characteristic.appliesToEndpoint(endpoint) and
2423
// the node is known not to be an endpoint of any appropriate type
2524
forall(EndpointType tp | tp = endpoint.getAPotentialType() |
@@ -28,7 +27,8 @@ where
2827
// the lowest confidence across all endpoint types should be at least highConfidence
2928
confidence = min(float c | characteristic.hasImplications(endpoint.getAPotentialType(), false, c)) and
3029
confidence >= SharedCharacteristics::highConfidence() and
31-
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName) and
30+
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
31+
_, extensibleType) and
3232
// It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes
3333
// as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly them here.
3434
not exists(EndpointCharacteristic characteristic2, float confidence2, EndpointType type2 |

java/ql/automodel/src/AutomodelFrameworkModeExtractPositiveExamples.ql

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,8 @@ from
1818
DollarAtString signature, DollarAtString input, DollarAtString output,
1919
DollarAtString parameterName, DollarAtString extensibleType
2020
where
21-
endpoint.getExtensibleType() = extensibleType and
22-
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName) and
23-
// Extract positive examples of sinks belonging to the existing ATM query configurations.
21+
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
22+
_, extensibleType) and
2423
CharacteristicsImpl::isKnownAs(endpoint, endpointType, _)
2524
select endpoint,
2625
endpointType + "\nrelated locations: $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@.", //

java/ql/automodel/src/AutomodelSharedCharacteristics.qll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -272,16 +272,17 @@ module SharedCharacteristics<CandidateSig Candidate> {
272272
/**
273273
* A high-confidence characteristic that indicates that an endpoint is neither a source nor a sink of any type.
274274
*/
275-
abstract class NeitherSourceNorSinkCharacteristic extends NotASinkCharacteristic, NotASourceCharacteristic {
275+
abstract class NeitherSourceNorSinkCharacteristic extends NotASinkCharacteristic,
276+
NotASourceCharacteristic
277+
{
276278
bindingset[this]
277279
NeitherSourceNorSinkCharacteristic() { any() }
278280

279281
final override predicate hasImplications(
280282
Candidate::EndpointType endpointType, boolean isPositiveIndicator, float confidence
281283
) {
282284
NotASinkCharacteristic.super.hasImplications(endpointType, isPositiveIndicator, confidence) or
283-
NotASourceCharacteristic.super
284-
.hasImplications(endpointType, isPositiveIndicator, confidence)
285+
NotASourceCharacteristic.super.hasImplications(endpointType, isPositiveIndicator, confidence)
285286
}
286287
}
287288

@@ -373,8 +374,7 @@ module SharedCharacteristics<CandidateSig Candidate> {
373374
/**
374375
* A negative characteristic that indicates that an endpoint was manually modeled as a neutral model.
375376
*/
376-
private class NeutralModelCharacteristic extends NeitherSourceNorSinkCharacteristic
377-
{
377+
private class NeutralModelCharacteristic extends NeitherSourceNorSinkCharacteristic {
378378
NeutralModelCharacteristic() { this = "known non-endpoint" }
379379

380380
override predicate appliesToEndpoint(Candidate::Endpoint e) { Candidate::isNeutral(e) }

0 commit comments

Comments
 (0)