Skip to content

Commit 6c47a5d

Browse files
author
Max Schaefer
committed
Refactor framework-mode queries to make them more easily testable.
1 parent adea805 commit 6c47a5d

5 files changed

+105
-63
lines changed

java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -372,29 +372,28 @@ class ApplicationModeMetadataExtractor extends string {
372372
}
373373

374374
/**
375-
* Holds if the given `endpoint` should be considered a candidate for the `extensibleType.
375+
* Holds if the given `endpoint` should be considered a candidate for the `extensibleType`.
376376
*
377377
* The other parameters record various other properties of interest.
378378
*/
379379
predicate isCandidate(
380380
Endpoint endpoint, string package, string type, string subtypes, string name, string signature,
381381
string input, string output, string isVarargs, string extensibleType, string alreadyAiModeled
382382
) {
383-
exists(ApplicationModeMetadataExtractor meta |
384-
CharacteristicsImpl::isCandidate(endpoint, _) and
385-
not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u |
386-
u.appliesToEndpoint(endpoint)
387-
) and
388-
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, isVarargs,
389-
alreadyAiModeled, extensibleType) and
390-
// If a node is already modeled in MaD, we don't include it as a candidate. Otherwise, we might include it as a
391-
// candidate for query A, but the model will label it as a sink for one of the sink types of query B, for which it's
392-
// already a known sink. This would result in overlap between our detected sinks and the pre-existing modeling. We
393-
// assume that, if a sink has already been modeled in a MaD model, then it doesn't belong to any additional sink
394-
// types, and we don't need to reexamine it.
395-
alreadyAiModeled.matches(["", "%ai-%"]) and
396-
AutomodelJavaUtil::includeAutomodelCandidate(package, type, name, signature)
397-
)
383+
CharacteristicsImpl::isCandidate(endpoint, _) and
384+
not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u |
385+
u.appliesToEndpoint(endpoint)
386+
) and
387+
any(ApplicationModeMetadataExtractor meta)
388+
.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, isVarargs,
389+
alreadyAiModeled, extensibleType) and
390+
// If a node is already modeled in MaD, we don't include it as a candidate. Otherwise, we might include it as a
391+
// candidate for query A, but the model will label it as a sink for one of the sink types of query B, for which it's
392+
// already a known sink. This would result in overlap between our detected sinks and the pre-existing modeling. We
393+
// assume that, if a sink has already been modeled in a MaD model, then it doesn't belong to any additional sink
394+
// types, and we don't need to reexamine it.
395+
alreadyAiModeled.matches(["", "%ai-%"]) and
396+
AutomodelJavaUtil::includeAutomodelCandidate(package, type, name, signature)
398397
}
399398

400399
/**

java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,85 @@ class FrameworkModeMetadataExtractor extends string {
312312
}
313313
}
314314

315+
/**
316+
* Holds if the given `endpoint` should be considered a candidate for the `extensibleType`.
317+
*
318+
* The other parameters record various other properties of interest.
319+
*/
320+
predicate isCandidate(
321+
Endpoint endpoint, string package, string type, string subtypes, string name, string signature,
322+
string input, string output, string parameterName, string extensibleType, string alreadyAiModeled
323+
) {
324+
CharacteristicsImpl::isCandidate(endpoint, _) and
325+
not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u |
326+
u.appliesToEndpoint(endpoint)
327+
) and
328+
any(FrameworkModeMetadataExtractor meta)
329+
.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
330+
alreadyAiModeled, extensibleType) and
331+
// If a node is already modeled in MaD, we don't include it as a candidate. Otherwise, we might include it as a
332+
// candidate for query A, but the model will label it as a sink for one of the sink types of query B, for which it's
333+
// already a known sink. This would result in overlap between our detected sinks and the pre-existing modeling. We
334+
// assume that, if a sink has already been modeled in a MaD model, then it doesn't belong to any additional sink
335+
// types, and we don't need to reexamine it.
336+
alreadyAiModeled.matches(["", "%ai-%"]) and
337+
AutomodelJavaUtil::includeAutomodelCandidate(package, type, name, signature)
338+
}
339+
340+
/**
341+
* Holds if the given `endpoint` is a negative example for the `extensibleType`
342+
* because of the `characteristic`.
343+
*
344+
* The other parameters record various other properties of interest.
345+
*/
346+
predicate isNegativeExample(
347+
Endpoint endpoint, EndpointCharacteristic characteristic, float confidence, string package,
348+
string type, string subtypes, string name, string signature, string input, string output,
349+
string parameterName, string extensibleType
350+
) {
351+
characteristic.appliesToEndpoint(endpoint) and
352+
// the node is known not to be an endpoint of any appropriate type
353+
forall(AutomodelEndpointTypes::EndpointType tp |
354+
tp = CharacteristicsImpl::getAPotentialType(endpoint)
355+
|
356+
characteristic.hasImplications(tp, false, _)
357+
) and
358+
// the lowest confidence across all endpoint types should be at least highConfidence
359+
confidence =
360+
min(float c |
361+
characteristic.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), false, c)
362+
) and
363+
confidence >= SharedCharacteristics::highConfidence() and
364+
any(FrameworkModeMetadataExtractor meta)
365+
.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
366+
_, extensibleType) and
367+
// It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes
368+
// as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly exclude them here.
369+
not exists(EndpointCharacteristic characteristic2, float confidence2 |
370+
characteristic2 != characteristic
371+
|
372+
characteristic2.appliesToEndpoint(endpoint) and
373+
confidence2 >= SharedCharacteristics::maximalConfidence() and
374+
characteristic2
375+
.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), true, confidence2)
376+
)
377+
}
378+
379+
/**
380+
* Holds if the given `endpoint` is a positive example for the `endpointType`.
381+
*
382+
* The other parameters record various other properties of interest.
383+
*/
384+
predicate isPositiveExample(
385+
Endpoint endpoint, string endpointType, string package, string type, string subtypes, string name,
386+
string signature, string input, string output, string parameterName, string extensibleType
387+
) {
388+
any(FrameworkModeMetadataExtractor meta)
389+
.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
390+
_, extensibleType) and
391+
CharacteristicsImpl::isKnownAs(endpoint, endpointType, _)
392+
}
393+
315394
/*
316395
* EndpointCharacteristic classes that are specific to Automodel for Java.
317396
*/

java/ql/automodel/src/AutomodelFrameworkModeExtractCandidates.ql

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -16,24 +16,12 @@ private import AutomodelFrameworkModeCharacteristics
1616
private import AutomodelJavaUtil
1717

1818
from
19-
Endpoint endpoint, FrameworkModeMetadataExtractor meta, DollarAtString package,
19+
Endpoint endpoint, DollarAtString package,
2020
DollarAtString type, DollarAtString subtypes, DollarAtString name, DollarAtString signature,
2121
DollarAtString input, DollarAtString output, DollarAtString parameterName,
2222
DollarAtString alreadyAiModeled, DollarAtString extensibleType
2323
where
24-
not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u |
25-
u.appliesToEndpoint(endpoint)
26-
) and
27-
CharacteristicsImpl::isCandidate(endpoint, _) and
28-
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
29-
alreadyAiModeled, extensibleType) and
30-
// If a node is already modeled in MaD, we don't include it as a candidate. Otherwise, we might include it as a
31-
// candidate for query A, but the model will label it as a sink for one of the sink types of query B, for which it's
32-
// already a known sink. This would result in overlap between our detected sinks and the pre-existing modeling. We
33-
// assume that, if a sink has already been modeled in a MaD model, then it doesn't belong to any additional sink
34-
// types, and we don't need to reexamine it.
35-
alreadyAiModeled.matches(["", "%ai-%"]) and
36-
includeAutomodelCandidate(package, type, name, signature)
24+
isCandidate(endpoint, package, type, subtypes, name, signature, input, output, parameterName, extensibleType, alreadyAiModeled)
3725
select endpoint,
3826
"Related locations: $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@.", //
3927
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, MethodDoc()), "MethodDoc", //

java/ql/automodel/src/AutomodelFrameworkModeExtractNegativeExamples.ql

Lines changed: 7 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -14,37 +14,15 @@ private import AutomodelJavaUtil
1414

1515
from
1616
Endpoint endpoint, EndpointCharacteristic characteristic, float confidence,
17-
DollarAtString message, FrameworkModeMetadataExtractor meta, DollarAtString package,
18-
DollarAtString type, DollarAtString subtypes, DollarAtString name, DollarAtString signature,
19-
DollarAtString input, DollarAtString output, DollarAtString parameterName,
20-
DollarAtString extensibleType
17+
DollarAtString package, DollarAtString type, DollarAtString subtypes, DollarAtString name,
18+
DollarAtString signature, DollarAtString input, DollarAtString output,
19+
DollarAtString parameterName, DollarAtString extensibleType
2120
where
22-
characteristic.appliesToEndpoint(endpoint) and
23-
// the node is known not to be an endpoint of any appropriate type
24-
forall(EndpointType tp | tp = CharacteristicsImpl::getAPotentialType(endpoint) |
25-
characteristic.hasImplications(tp, false, _)
26-
) and
27-
// the lowest confidence across all endpoint types should be at least highConfidence
28-
confidence =
29-
min(float c |
30-
characteristic.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), false, c)
31-
) and
32-
confidence >= SharedCharacteristics::highConfidence() and
33-
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
34-
_, extensibleType) and
35-
// It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes
36-
// as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly exclude them here.
37-
not exists(EndpointCharacteristic characteristic2, float confidence2 |
38-
characteristic2 != characteristic
39-
|
40-
characteristic2.appliesToEndpoint(endpoint) and
41-
confidence2 >= SharedCharacteristics::maximalConfidence() and
42-
characteristic2
43-
.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), true, confidence2)
44-
) and
45-
message = characteristic
21+
isNegativeExample(endpoint, characteristic, confidence, package, type, subtypes, name, signature,
22+
input, output, parameterName, extensibleType)
4623
select endpoint,
47-
message + "\nrelated locations: $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@.", //
24+
characteristic + "\nrelated locations: $@, $@." +
25+
"\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@.", //
4826
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, MethodDoc()), "MethodDoc", //
4927
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, ClassDoc()), "ClassDoc", //
5028
package, "package", //

java/ql/automodel/src/AutomodelFrameworkModeExtractPositiveExamples.ql

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,12 @@ private import AutomodelEndpointTypes
1313
private import AutomodelJavaUtil
1414

1515
from
16-
Endpoint endpoint, EndpointType endpointType, FrameworkModeMetadataExtractor meta,
16+
Endpoint endpoint, EndpointType endpointType,
1717
DollarAtString package, DollarAtString type, DollarAtString subtypes, DollarAtString name,
1818
DollarAtString signature, DollarAtString input, DollarAtString output,
1919
DollarAtString parameterName, DollarAtString extensibleType
2020
where
21-
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
22-
_, extensibleType) and
23-
CharacteristicsImpl::isKnownAs(endpoint, endpointType, _)
21+
isPositiveExample(endpoint, endpointType, package, type, subtypes, name, signature, input, output, parameterName, extensibleType)
2422
select endpoint,
2523
endpointType + "\nrelated locations: $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@.", //
2624
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, MethodDoc()), "MethodDoc", //

0 commit comments

Comments
 (0)