Skip to content

Commit 41726f5

Browse files
authored
Merge pull request github#13954 from github/kaeluka/add-provenance-to-metadata
Java: Automodel: Add Candidates for Regression Testing
2 parents ae4c76c + de76c07 commit 41726f5

10 files changed

+61
-32
lines changed

java/ql/src/Telemetry/AutomodelApplicationModeExtractCandidates.ql

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ private Endpoint getSampleForSignature(
5555
from
5656
Endpoint endpoint, string message, ApplicationModeMetadataExtractor meta, DollarAtString package,
5757
DollarAtString type, DollarAtString subtypes, DollarAtString name, DollarAtString signature,
58-
DollarAtString input, DollarAtString isVarargsArray
58+
DollarAtString input, DollarAtString isVarargsArray, DollarAtString alreadyAiModeled
5959
where
6060
not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u |
6161
u.appliesToEndpoint(endpoint)
@@ -67,25 +67,31 @@ where
6767
// label it as a sink for one of the sink types of query B, for which it's already a known sink. This would result in
6868
// overlap between our detected sinks and the pre-existing modeling. We assume that, if a sink has already been
6969
// modeled in a MaD model, then it doesn't belong to any additional sink types, and we don't need to reexamine it.
70-
not CharacteristicsImpl::isSink(endpoint, _, _) and
70+
(
71+
not CharacteristicsImpl::isSink(endpoint, _, _) and alreadyAiModeled = ""
72+
or
73+
alreadyAiModeled.matches("%ai-%") and
74+
CharacteristicsImpl::isSink(endpoint, _, alreadyAiModeled)
75+
) and
7176
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, isVarargsArray) and
7277
includeAutomodelCandidate(package, type, name, signature) and
7378
// The message is the concatenation of all sink types for which this endpoint is known neither to be a sink nor to be
7479
// a non-sink, and we surface only endpoints that have at least one such sink type.
7580
message =
7681
strictconcat(AutomodelEndpointTypes::SinkType sinkType |
77-
not CharacteristicsImpl::isKnownSink(endpoint, sinkType) and
82+
not CharacteristicsImpl::isKnownSink(endpoint, sinkType, _) and
7883
CharacteristicsImpl::isSinkCandidate(endpoint, sinkType)
7984
|
8085
sinkType, ", "
8186
)
8287
select endpoint.asNode(),
83-
message + "\nrelated locations: $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@.", //
88+
message + "\nrelated locations: $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@.", //
8489
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext()), "CallContext", //
8590
package, "package", //
8691
type, "type", //
8792
subtypes, "subtypes", //
8893
name, "name", // method name
8994
signature, "signature", //
9095
input, "input", //
91-
isVarargsArray, "isVarargsArray"
96+
isVarargsArray, "isVarargsArray", //
97+
alreadyAiModeled, "alreadyAiModeled"

java/ql/src/Telemetry/AutomodelApplicationModeExtractPositiveExamples.ql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ where
2222
not erroneousEndpoints(endpoint, _, _, _, _, false) and
2323
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, isVarargsArray) and
2424
// Extract positive examples of sinks belonging to the existing ATM query configurations.
25-
CharacteristicsImpl::isKnownSink(endpoint, sinkType) and
25+
CharacteristicsImpl::isKnownSink(endpoint, sinkType, _) and
2626
exists(CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext()))
2727
select endpoint.asNode(),
2828
sinkType + "\nrelated locations: $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@.", //

java/ql/src/Telemetry/AutomodelFrameworkModeExtractCandidates.ql

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ private import AutomodelJavaUtil
1818
from
1919
Endpoint endpoint, string message, FrameworkModeMetadataExtractor meta, DollarAtString package,
2020
DollarAtString type, DollarAtString subtypes, DollarAtString name, DollarAtString signature,
21-
DollarAtString input, DollarAtString parameterName
21+
DollarAtString input, DollarAtString parameterName, DollarAtString alreadyAiModeled
2222
where
2323
not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u |
2424
u.appliesToEndpoint(endpoint)
@@ -28,20 +28,25 @@ where
2828
// label it as a sink for one of the sink types of query B, for which it's already a known sink. This would result in
2929
// overlap between our detected sinks and the pre-existing modeling. We assume that, if a sink has already been
3030
// modeled in a MaD model, then it doesn't belong to any additional sink types, and we don't need to reexamine it.
31-
not CharacteristicsImpl::isSink(endpoint, _, _) and
31+
(
32+
not CharacteristicsImpl::isSink(endpoint, _, _) and alreadyAiModeled = ""
33+
or
34+
alreadyAiModeled.matches("%ai-%") and
35+
CharacteristicsImpl::isSink(endpoint, _, alreadyAiModeled)
36+
) and
3237
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, parameterName) and
3338
includeAutomodelCandidate(package, type, name, signature) and
3439
// The message is the concatenation of all sink types for which this endpoint is known neither to be a sink nor to be
3540
// a non-sink, and we surface only endpoints that have at least one such sink type.
3641
message =
3742
strictconcat(AutomodelEndpointTypes::SinkType sinkType |
38-
not CharacteristicsImpl::isKnownSink(endpoint, sinkType) and
43+
not CharacteristicsImpl::isKnownSink(endpoint, sinkType, _) and
3944
CharacteristicsImpl::isSinkCandidate(endpoint, sinkType)
4045
|
4146
sinkType, ", "
4247
)
4348
select endpoint,
44-
message + "\nrelated locations: $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@.", //
49+
message + "\nrelated locations: $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@.", //
4550
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, MethodDoc()), "MethodDoc", //
4651
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, ClassDoc()), "ClassDoc", //
4752
package, "package", //
@@ -50,4 +55,5 @@ select endpoint,
5055
name, "name", //
5156
signature, "signature", //
5257
input, "input", //
53-
parameterName, "parameterName" //
58+
parameterName, "parameterName", //
59+
alreadyAiModeled, "alreadyAiModeled"

java/ql/src/Telemetry/AutomodelFrameworkModeExtractPositiveExamples.ql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ where
2222
not erroneousEndpoints(endpoint, _, _, _, _, false) and
2323
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, parameterName) and
2424
// Extract positive examples of sinks belonging to the existing ATM query configurations.
25-
CharacteristicsImpl::isKnownSink(endpoint, sinkType)
25+
CharacteristicsImpl::isKnownSink(endpoint, sinkType, _)
2626
select endpoint,
2727
sinkType + "\nrelated locations: $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@.", //
2828
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, MethodDoc()), "MethodDoc", //

java/ql/src/Telemetry/AutomodelSharedCharacteristics.qll

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -94,14 +94,15 @@ module SharedCharacteristics<CandidateSig Candidate> {
9494
/**
9595
* Holds if `sink` is a known sink of type `endpointType`.
9696
*/
97-
predicate isKnownSink(Candidate::Endpoint sink, Candidate::EndpointType endpointType) {
97+
predicate isKnownSink(
98+
Candidate::Endpoint sink, Candidate::EndpointType endpointType,
99+
EndpointCharacteristic characteristic
100+
) {
98101
// If the list of characteristics includes positive indicators with maximal confidence for this class, then it's a
99102
// known sink for the class.
100103
not endpointType instanceof Candidate::NegativeEndpointType and
101-
exists(EndpointCharacteristic characteristic |
102-
characteristic.appliesToEndpoint(sink) and
103-
characteristic.hasImplications(endpointType, true, maximalConfidence())
104-
)
104+
characteristic.appliesToEndpoint(sink) and
105+
characteristic.hasImplications(endpointType, true, maximalConfidence())
105106
}
106107

107108
/**
@@ -275,15 +276,17 @@ module SharedCharacteristics<CandidateSig Candidate> {
275276
private class KnownSinkCharacteristic extends SinkCharacteristic {
276277
string madKind;
277278
Candidate::EndpointType endpointType;
279+
string provenance;
278280

279281
KnownSinkCharacteristic() {
280282
Candidate::isKnownKind(madKind, endpointType) and
281283
// bind "this" to a unique string differing from that of the SinkType classes
282-
this = madKind + "-characteristic"
284+
this = madKind + "_" + provenance + "_characteristic" and
285+
Candidate::isSink(_, madKind, provenance)
283286
}
284287

285288
override predicate appliesToEndpoint(Candidate::Endpoint e) {
286-
Candidate::isSink(e, madKind, _)
289+
Candidate::isSink(e, madKind, provenance)
287290
}
288291

289292
override Candidate::EndpointType getSinkType() { result = endpointType }
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1-
| Test.java:16:3:16:11 | reference | command-injection, path-injection, request-forgery, sql-injection\nrelated locations: $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@. | Test.java:16:3:16:24 | set(...) | CallContext | file://java.util.concurrent.atomic:1:1:1:1 | java.util.concurrent.atomic | package | file://AtomicReference:1:1:1:1 | AtomicReference | type | file://false:1:1:1:1 | false | subtypes | file://set:1:1:1:1 | set | name | file://(String):1:1:1:1 | (String) | signature | file://Argument[this]:1:1:1:1 | Argument[this] | input | file://false:1:1:1:1 | false | isVarargsArray |
2-
| Test.java:21:3:21:10 | supplier | command-injection, path-injection, request-forgery, sql-injection\nrelated locations: $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@. | Test.java:21:3:21:16 | get(...) | CallContext | file://java.util.function:1:1:1:1 | java.util.function | package | file://Supplier:1:1:1:1 | Supplier | type | file://true:1:1:1:1 | true | subtypes | file://get:1:1:1:1 | get | name | file://():1:1:1:1 | () | signature | file://Argument[this]:1:1:1:1 | Argument[this] | input | file://false:1:1:1:1 | false | isVarargsArray |
3-
| Test.java:53:4:53:4 | o | command-injection, path-injection, request-forgery, sql-injection\nrelated locations: $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@. | Test.java:51:3:56:3 | walk(...) | CallContext | file://java.nio.file:1:1:1:1 | java.nio.file | package | file://Files:1:1:1:1 | Files | type | file://false:1:1:1:1 | false | subtypes | file://walk:1:1:1:1 | walk | name | file://(Path,FileVisitOption[]):1:1:1:1 | (Path,FileVisitOption[]) | signature | file://Argument[1]:1:1:1:1 | Argument[1] | input | file://true:1:1:1:1 | true | isVarargsArray |
1+
| Test.java:16:3:16:11 | reference | command-injection, path-injection, request-forgery, sql-injection\nrelated locations: $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@. | Test.java:16:3:16:24 | set(...) | CallContext | file://java.util.concurrent.atomic:1:1:1:1 | java.util.concurrent.atomic | package | file://AtomicReference:1:1:1:1 | AtomicReference | type | file://false:1:1:1:1 | false | subtypes | file://set:1:1:1:1 | set | name | file://(String):1:1:1:1 | (String) | signature | file://Argument[this]:1:1:1:1 | Argument[this] | input | file://false:1:1:1:1 | false | isVarargsArray | file://:1:1:1:1 | | alreadyAiModeled |
2+
| Test.java:21:3:21:10 | supplier | command-injection, path-injection, request-forgery, sql-injection\nrelated locations: $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@. | Test.java:21:3:21:16 | get(...) | CallContext | file://java.util.function:1:1:1:1 | java.util.function | package | file://Supplier:1:1:1:1 | Supplier | type | file://true:1:1:1:1 | true | subtypes | file://get:1:1:1:1 | get | name | file://():1:1:1:1 | () | signature | file://Argument[this]:1:1:1:1 | Argument[this] | input | file://false:1:1:1:1 | false | isVarargsArray | file://:1:1:1:1 | | alreadyAiModeled |
3+
| Test.java:34:4:34:11 | openPath | command-injection, request-forgery, sql-injection\nrelated locations: $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@. | Test.java:33:10:35:3 | newInputStream(...) | CallContext | file://java.nio.file:1:1:1:1 | java.nio.file | package | file://Files:1:1:1:1 | Files | type | file://false:1:1:1:1 | false | subtypes | file://newInputStream:1:1:1:1 | newInputStream | name | file://(Path,OpenOption[]):1:1:1:1 | (Path,OpenOption[]) | signature | file://Argument[0]:1:1:1:1 | Argument[0] | input | file://false:1:1:1:1 | false | isVarargsArray | file://ai-manual:1:1:1:1 | ai-manual | alreadyAiModeled |
4+
| Test.java:53:4:53:4 | o | command-injection, path-injection, request-forgery, sql-injection\nrelated locations: $@.\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@. | Test.java:51:3:56:3 | walk(...) | CallContext | file://java.nio.file:1:1:1:1 | java.nio.file | package | file://Files:1:1:1:1 | Files | type | file://false:1:1:1:1 | false | subtypes | file://walk:1:1:1:1 | walk | name | file://(Path,FileVisitOption[]):1:1:1:1 | (Path,FileVisitOption[]) | signature | file://Argument[1]:1:1:1:1 | Argument[1] | input | file://true:1:1:1:1 | true | isVarargsArray | file://:1:1:1:1 | | alreadyAiModeled |

java/ql/test/query-tests/Telemetry/AutomodelApplicationModeExtraction/Test.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ public static void copyFiles(Path source, Path target, CopyOption option) throws
3131

3232
public static InputStream getInputStream(Path openPath) throws Exception {
3333
return Files.newInputStream(
34-
openPath // positive example (known sink)
34+
openPath // positive example (known sink), candidate ("only" ai-modeled, and useful as a candidate in regression testing)
3535
);
3636
}
3737

@@ -56,4 +56,3 @@ public static void FilesWalkExample(Path p, FileVisitOption o) throws Exception
5656
);
5757
}
5858
}
59-

0 commit comments

Comments
 (0)