Skip to content

Commit 5c43a0b

Browse files
authored
Merge pull request #15356 from github/max-schaefer/automodel-void-source-candidates
Automodel: Switch tests to inline expectations
2 parents 0a8869c + 99c9914 commit 5c43a0b

File tree

34 files changed

+440
-260
lines changed

34 files changed

+440
-260
lines changed

java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll

Lines changed: 92 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -239,13 +239,12 @@ module ApplicationCandidatesImpl implements SharedCharacteristics::CandidateSig
239239
// Sanitizers are currently not modeled in MaD. TODO: check if this has large negative impact.
240240
predicate isSanitizer(Endpoint e, EndpointType t) {
241241
exists(t) and
242-
(
243-
e.asNode().getType() instanceof BoxedType
244-
or
245-
e.asNode().getType() instanceof PrimitiveType
246-
or
247-
e.asNode().getType() instanceof NumberType
248-
)
242+
AutomodelJavaUtil::isUnexploitableType([
243+
// for most endpoints, we can get the type from the node
244+
e.asNode().getType(),
245+
// but not for calls to void methods, where we need to go via the AST
246+
e.asTop().(Expr).getType()
247+
])
249248
or
250249
t instanceof AutomodelEndpointTypes::PathInjectionSinkType and
251250
e.asNode() instanceof PathSanitizer::PathInjectionSanitizer
@@ -372,62 +371,124 @@ class ApplicationModeMetadataExtractor extends string {
372371
}
373372
}
374373

374+
/**
375+
* Holds if the given `endpoint` should be considered a candidate for the `extensibleType`.
376+
*
377+
* The other parameters record various other properties of interest.
378+
*/
379+
predicate isCandidate(
380+
Endpoint endpoint, string package, string type, string subtypes, string name, string signature,
381+
string input, string output, string isVarargs, string extensibleType, string alreadyAiModeled
382+
) {
383+
CharacteristicsImpl::isCandidate(endpoint, _) and
384+
not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u |
385+
u.appliesToEndpoint(endpoint)
386+
) and
387+
any(ApplicationModeMetadataExtractor meta)
388+
.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, isVarargs,
389+
alreadyAiModeled, extensibleType) and
390+
// If a node is already modeled in MaD, we don't include it as a candidate. Otherwise, we might include it as a
391+
// candidate for query A, but the model will label it as a sink for one of the sink types of query B, for which it's
392+
// already a known sink. This would result in overlap between our detected sinks and the pre-existing modeling. We
393+
// assume that, if a sink has already been modeled in a MaD model, then it doesn't belong to any additional sink
394+
// types, and we don't need to reexamine it.
395+
alreadyAiModeled.matches(["", "%ai-%"]) and
396+
AutomodelJavaUtil::includeAutomodelCandidate(package, type, name, signature)
397+
}
398+
399+
/**
400+
* Holds if the given `endpoint` is a negative example for the `extensibleType`
401+
* because of the `characteristic`.
402+
*
403+
* The other parameters record various other properties of interest.
404+
*/
405+
predicate isNegativeExample(
406+
Endpoint endpoint, EndpointCharacteristic characteristic, float confidence, string package,
407+
string type, string subtypes, string name, string signature, string input, string output,
408+
string isVarargsArray, string extensibleType
409+
) {
410+
characteristic.appliesToEndpoint(endpoint) and
411+
// the node is known not to be an endpoint of any appropriate type
412+
forall(AutomodelEndpointTypes::EndpointType tp |
413+
tp = CharacteristicsImpl::getAPotentialType(endpoint)
414+
|
415+
characteristic.hasImplications(tp, false, _)
416+
) and
417+
// the lowest confidence across all endpoint types should be at least highConfidence
418+
confidence =
419+
min(float c |
420+
characteristic.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), false, c)
421+
) and
422+
confidence >= SharedCharacteristics::highConfidence() and
423+
any(ApplicationModeMetadataExtractor meta)
424+
.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output,
425+
isVarargsArray, _, extensibleType) and
426+
// It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes
427+
// as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly exclude them here.
428+
not exists(EndpointCharacteristic characteristic2, float confidence2 |
429+
characteristic2 != characteristic
430+
|
431+
characteristic2.appliesToEndpoint(endpoint) and
432+
confidence2 >= SharedCharacteristics::maximalConfidence() and
433+
characteristic2
434+
.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), true, confidence2)
435+
)
436+
}
437+
438+
/**
439+
* Holds if the given `endpoint` is a positive example for the `endpointType`.
440+
*
441+
* The other parameters record various other properties of interest.
442+
*/
443+
predicate isPositiveExample(
444+
Endpoint endpoint, string endpointType, string package, string type, string subtypes, string name,
445+
string signature, string input, string output, string isVarargsArray, string extensibleType
446+
) {
447+
any(ApplicationModeMetadataExtractor meta)
448+
.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output,
449+
isVarargsArray, _, extensibleType) and
450+
CharacteristicsImpl::isKnownAs(endpoint, endpointType, _) and
451+
exists(CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext()))
452+
}
453+
375454
/*
376455
* EndpointCharacteristic classes that are specific to Automodel for Java.
377456
*/
378457

379458
/**
380-
* A negative characteristic that indicates that parameters of an is-style boolean method should not be considered sinks,
381-
* and its return value should not be considered a source.
459+
* A negative characteristic that indicates that parameters of an is-style boolean method should not be considered sinks.
382460
*
383461
* A sink is highly unlikely to be exploitable if its callable's name starts with `is` and the callable has a boolean return
384462
* type (e.g. `isDirectory`). These kinds of calls normally do only checks, and appear before the proper call that does
385463
* the dangerous/interesting thing, so we want the latter to be modeled as the sink.
386464
*
387465
* TODO: this might filter too much, it's possible that methods with more than one parameter contain interesting sinks
388466
*/
389-
private class UnexploitableIsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic
390-
{
467+
private class UnexploitableIsCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic {
391468
UnexploitableIsCharacteristic() { this = "unexploitable (is-style boolean method)" }
392469

393470
override predicate appliesToEndpoint(Endpoint e) {
394471
e.getCallable().getName().matches("is%") and
395472
e.getCallable().getReturnType() instanceof BooleanType and
396-
(
397-
e.getExtensibleType() = "sinkModel" and
398-
not ApplicationCandidatesImpl::isSink(e, _, _)
399-
or
400-
e.getExtensibleType() = "sourceModel" and
401-
not ApplicationCandidatesImpl::isSource(e, _, _) and
402-
e.getMaDOutput() = "ReturnValue"
403-
)
473+
not ApplicationCandidatesImpl::isSink(e, _, _)
404474
}
405475
}
406476

407477
/**
408478
* A negative characteristic that indicates that parameters of an existence-checking boolean method should not be
409-
* considered sinks, and its return value should not be considered a source.
479+
* considered sinks.
410480
*
411481
* A sink is highly unlikely to be exploitable if its callable's name is `exists` or `notExists` and the callable has a
412482
* boolean return type. These kinds of calls normally do only checks, and appear before the proper call that does the
413483
* dangerous/interesting thing, so we want the latter to be modeled as the sink.
414484
*/
415-
private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic
416-
{
485+
private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic {
417486
UnexploitableExistsCharacteristic() { this = "unexploitable (existence-checking boolean method)" }
418487

419488
override predicate appliesToEndpoint(Endpoint e) {
420-
exists(Callable callable |
421-
callable = e.getCallable() and
489+
exists(Callable callable | callable = e.getCallable() |
422490
callable.getName().toLowerCase() = ["exists", "notexists"] and
423491
callable.getReturnType() instanceof BooleanType
424-
|
425-
e.getExtensibleType() = "sinkModel" and
426-
not ApplicationCandidatesImpl::isSink(e, _, _)
427-
or
428-
e.getExtensibleType() = "sourceModel" and
429-
not ApplicationCandidatesImpl::isSource(e, _, _) and
430-
e.getMaDOutput() = "ReturnValue"
431492
)
432493
}
433494
}

java/ql/automodel/src/AutomodelApplicationModeExtractCandidates.ql

Lines changed: 6 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -55,27 +55,15 @@ private Endpoint getSampleForSignature(
5555
}
5656

5757
from
58-
Endpoint endpoint, ApplicationModeMetadataExtractor meta, DollarAtString package,
59-
DollarAtString type, DollarAtString subtypes, DollarAtString name, DollarAtString signature,
60-
DollarAtString input, DollarAtString output, DollarAtString isVarargsArray,
61-
DollarAtString alreadyAiModeled, DollarAtString extensibleType
58+
Endpoint endpoint, DollarAtString package, DollarAtString type, DollarAtString subtypes,
59+
DollarAtString name, DollarAtString signature, DollarAtString input, DollarAtString output,
60+
DollarAtString isVarargsArray, DollarAtString alreadyAiModeled, DollarAtString extensibleType
6261
where
63-
not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u |
64-
u.appliesToEndpoint(endpoint)
65-
) and
66-
CharacteristicsImpl::isCandidate(endpoint, _) and
62+
isCandidate(endpoint, package, type, subtypes, name, signature, input, output, isVarargsArray,
63+
extensibleType, alreadyAiModeled) and
6764
endpoint =
6865
getSampleForSignature(9, package, type, subtypes, name, signature, input, output,
69-
isVarargsArray, extensibleType, alreadyAiModeled) and
70-
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output,
71-
isVarargsArray, alreadyAiModeled, extensibleType) and
72-
// If a node is already modeled in MaD, we don't include it as a candidate. Otherwise, we might include it as a
73-
// candidate for query A, but the model will label it as a sink for one of the sink types of query B, for which it's
74-
// already a known sink. This would result in overlap between our detected sinks and the pre-existing modeling. We
75-
// assume that, if a sink has already been modeled in a MaD model, then it doesn't belong to any additional sink
76-
// types, and we don't need to reexamine it.
77-
alreadyAiModeled.matches(["", "%ai-%"]) and
78-
includeAutomodelCandidate(package, type, name, signature)
66+
isVarargsArray, extensibleType, alreadyAiModeled)
7967
select endpoint.asNode(),
8068
"Related locations: $@, $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@.", //
8169
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext()), "CallContext", //

java/ql/automodel/src/AutomodelApplicationModeExtractNegativeExamples.ql

Lines changed: 2 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -40,45 +40,15 @@ Endpoint getSampleForCharacteristic(EndpointCharacteristic c, int limit) {
4040
)
4141
}
4242

43-
predicate candidate(
44-
Endpoint endpoint, EndpointCharacteristic characteristic, float confidence, string package,
45-
string type, string subtypes, string name, string signature, string input, string output,
46-
string isVarargsArray, string extensibleType
47-
) {
48-
// the node is known not to be an endpoint of any appropriate type
49-
forall(EndpointType tp | tp = CharacteristicsImpl::getAPotentialType(endpoint) |
50-
characteristic.hasImplications(tp, false, _)
51-
) and
52-
// the lowest confidence across all endpoint types should be at least highConfidence
53-
confidence =
54-
min(float c |
55-
characteristic.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), false, c)
56-
) and
57-
confidence >= SharedCharacteristics::highConfidence() and
58-
any(ApplicationModeMetadataExtractor meta)
59-
.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output,
60-
isVarargsArray, _, extensibleType) and
61-
// It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes
62-
// as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly exclude them here.
63-
not exists(EndpointCharacteristic characteristic2, float confidence2 |
64-
characteristic2 != characteristic
65-
|
66-
characteristic2.appliesToEndpoint(endpoint) and
67-
confidence2 >= SharedCharacteristics::maximalConfidence() and
68-
characteristic2
69-
.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), true, confidence2)
70-
)
71-
}
72-
7343
from
7444
Endpoint endpoint, EndpointCharacteristic characteristic, float confidence, string message,
7545
DollarAtString package, DollarAtString type, DollarAtString subtypes, DollarAtString name,
7646
DollarAtString signature, DollarAtString input, DollarAtString output,
7747
DollarAtString isVarargsArray, DollarAtString extensibleType
7848
where
7949
endpoint = getSampleForCharacteristic(characteristic, 100) and
80-
candidate(endpoint, characteristic, confidence, package, type, subtypes, name, signature, input,
81-
output, isVarargsArray, extensibleType) and
50+
isNegativeExample(endpoint, characteristic, confidence, package, type, subtypes, name, signature,
51+
input, output, isVarargsArray, extensibleType) and
8252
message = characteristic
8353
select endpoint.asNode(),
8454
message + "\nrelated locations: $@, $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@.", //

java/ql/automodel/src/AutomodelApplicationModeExtractPositiveExamples.ql

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,8 @@ from
1818
DollarAtString signature, DollarAtString input, DollarAtString output,
1919
DollarAtString isVarargsArray, DollarAtString extensibleType
2020
where
21-
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output,
22-
isVarargsArray, _, extensibleType) and
23-
CharacteristicsImpl::isKnownAs(endpoint, endpointType, _) and
24-
exists(CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext()))
21+
isPositiveExample(endpoint, endpointType, package, type, subtypes, name, signature, input, output,
22+
isVarargsArray, extensibleType)
2523
select endpoint.asNode(),
2624
endpointType + "\nrelated locations: $@, $@, $@." +
2725
"\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@.", //

java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll

Lines changed: 84 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,7 @@ newtype JavaRelatedLocationType =
2525
newtype TFrameworkModeEndpoint =
2626
TExplicitParameter(Parameter p) {
2727
AutomodelJavaUtil::isFromSource(p) and
28-
not p.getType() instanceof PrimitiveType and
29-
not p.getType() instanceof BoxedType and
30-
not p.getType() instanceof NumberType
28+
not AutomodelJavaUtil::isUnexploitableType(p.getType())
3129
} or
3230
TQualifier(Callable c) { AutomodelJavaUtil::isFromSource(c) and not c instanceof Constructor } or
3331
TReturnValue(Callable c) {
@@ -36,25 +34,19 @@ newtype TFrameworkModeEndpoint =
3634
or
3735
AutomodelJavaUtil::isFromSource(c) and
3836
c instanceof Method and
39-
(
40-
not c.getReturnType() instanceof VoidType and
41-
not c.getReturnType() instanceof PrimitiveType
42-
)
37+
not AutomodelJavaUtil::isUnexploitableType(c.getReturnType())
4338
} or
4439
TOverridableParameter(Method m, Parameter p) {
4540
AutomodelJavaUtil::isFromSource(p) and
41+
not AutomodelJavaUtil::isUnexploitableType(p.getType()) and
4642
p.getCallable() = m and
4743
m instanceof ModelExclusions::ModelApi and
48-
not m.getDeclaringType().isFinal() and
49-
not m.isFinal() and
50-
not m.isStatic()
44+
AutomodelJavaUtil::isOverridable(m)
5145
} or
5246
TOverridableQualifier(Method m) {
5347
AutomodelJavaUtil::isFromSource(m) and
5448
m instanceof ModelExclusions::ModelApi and
55-
not m.getDeclaringType().isFinal() and
56-
not m.isFinal() and
57-
not m.isStatic()
49+
AutomodelJavaUtil::isOverridable(m)
5850
}
5951

6052
/**
@@ -317,6 +309,85 @@ class FrameworkModeMetadataExtractor extends string {
317309
}
318310
}
319311

312+
/**
313+
* Holds if the given `endpoint` should be considered a candidate for the `extensibleType`.
314+
*
315+
* The other parameters record various other properties of interest.
316+
*/
317+
predicate isCandidate(
318+
Endpoint endpoint, string package, string type, string subtypes, string name, string signature,
319+
string input, string output, string parameterName, string extensibleType, string alreadyAiModeled
320+
) {
321+
CharacteristicsImpl::isCandidate(endpoint, _) and
322+
not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u |
323+
u.appliesToEndpoint(endpoint)
324+
) and
325+
any(FrameworkModeMetadataExtractor meta)
326+
.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
327+
alreadyAiModeled, extensibleType) and
328+
// If a node is already modeled in MaD, we don't include it as a candidate. Otherwise, we might include it as a
329+
// candidate for query A, but the model will label it as a sink for one of the sink types of query B, for which it's
330+
// already a known sink. This would result in overlap between our detected sinks and the pre-existing modeling. We
331+
// assume that, if a sink has already been modeled in a MaD model, then it doesn't belong to any additional sink
332+
// types, and we don't need to reexamine it.
333+
alreadyAiModeled.matches(["", "%ai-%"]) and
334+
AutomodelJavaUtil::includeAutomodelCandidate(package, type, name, signature)
335+
}
336+
337+
/**
338+
* Holds if the given `endpoint` is a negative example for the `extensibleType`
339+
* because of the `characteristic`.
340+
*
341+
* The other parameters record various other properties of interest.
342+
*/
343+
predicate isNegativeExample(
344+
Endpoint endpoint, EndpointCharacteristic characteristic, float confidence, string package,
345+
string type, string subtypes, string name, string signature, string input, string output,
346+
string parameterName, string extensibleType
347+
) {
348+
characteristic.appliesToEndpoint(endpoint) and
349+
// the node is known not to be an endpoint of any appropriate type
350+
forall(AutomodelEndpointTypes::EndpointType tp |
351+
tp = CharacteristicsImpl::getAPotentialType(endpoint)
352+
|
353+
characteristic.hasImplications(tp, false, _)
354+
) and
355+
// the lowest confidence across all endpoint types should be at least highConfidence
356+
confidence =
357+
min(float c |
358+
characteristic.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), false, c)
359+
) and
360+
confidence >= SharedCharacteristics::highConfidence() and
361+
any(FrameworkModeMetadataExtractor meta)
362+
.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
363+
_, extensibleType) and
364+
// It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes
365+
// as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly exclude them here.
366+
not exists(EndpointCharacteristic characteristic2, float confidence2 |
367+
characteristic2 != characteristic
368+
|
369+
characteristic2.appliesToEndpoint(endpoint) and
370+
confidence2 >= SharedCharacteristics::maximalConfidence() and
371+
characteristic2
372+
.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), true, confidence2)
373+
)
374+
}
375+
376+
/**
377+
* Holds if the given `endpoint` is a positive example for the `endpointType`.
378+
*
379+
* The other parameters record various other properties of interest.
380+
*/
381+
predicate isPositiveExample(
382+
Endpoint endpoint, string endpointType, string package, string type, string subtypes, string name,
383+
string signature, string input, string output, string parameterName, string extensibleType
384+
) {
385+
any(FrameworkModeMetadataExtractor meta)
386+
.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
387+
_, extensibleType) and
388+
CharacteristicsImpl::isKnownAs(endpoint, endpointType, _)
389+
}
390+
320391
/*
321392
* EndpointCharacteristic classes that are specific to Automodel for Java.
322393
*/

0 commit comments

Comments
 (0)