Skip to content

Commit fcabca4

Browse files
author
Stephan Brandauer
committed
Java: Export MaD output in application mode extraction queries
1 parent 8d133f8 commit fcabca4

7 files changed

+57
-46
lines changed

java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,21 @@ abstract private class ApplicationModeEndpoint extends TApplicationModeEndpoint
4646

4747
abstract string getMaDInput();
4848

49+
abstract string getMaDOutput();
50+
4951
abstract Top asTop();
5052

5153
abstract DataFlow::Node asNode();
5254

53-
abstract string getExtensibleType();
55+
string getExtensibleType() {
56+
// XXX the sourceModel still implements a bogus getMaDInput() method, so we can't use this yet
57+
if /* not exists(this.getMaDInput()) and */ exists(this.getMaDOutput())
58+
then result = "sourceModel"
59+
else
60+
if exists(this.getMaDInput()) and not exists(this.getMaDOutput())
61+
then result = "sinkModel"
62+
else none() // if both exist, it would be a summaryModel (not yet supported)
63+
}
5464

5565
abstract string toString();
5666
}
@@ -68,16 +78,14 @@ class ExplicitArgument extends ApplicationModeEndpoint, TExplicitArgument {
6878

6979
private int getArgIndex() { this.asTop() = call.getArgument(result) }
7080

71-
override string getMaDInput() {
72-
result = "Argument[" + this.getArgIndex() + "]"
73-
}
81+
override string getMaDInput() { result = "Argument[" + this.getArgIndex() + "]" }
82+
83+
override string getMaDOutput() { none() }
7484

7585
override Top asTop() { result = arg.asExpr() }
7686

7787
override DataFlow::Node asNode() { result = arg }
7888

79-
override string getExtensibleType() { result = "sinkModel" }
80-
8189
override string toString() { result = arg.toString() }
8290
}
8391

@@ -91,12 +99,12 @@ class InstanceArgument extends ApplicationModeEndpoint, TInstanceArgument {
9199

92100
override string getMaDInput() { result = "Argument[this]" }
93101

102+
override string getMaDOutput() { none() }
103+
94104
override Top asTop() { if exists(arg.asExpr()) then result = arg.asExpr() else result = call }
95105

96106
override DataFlow::Node asNode() { result = arg }
97107

98-
override string getExtensibleType() { result = "sinkModel" }
99-
100108
override string toString() { result = arg.toString() }
101109
}
102110

@@ -118,16 +126,14 @@ class ImplicitVarargsArray extends ApplicationModeEndpoint, TImplicitVarargsArra
118126

119127
override Call getCall() { result = call }
120128

121-
override string getMaDInput() {
122-
result = "Argument[" + idx + "]"
123-
}
129+
override string getMaDInput() { result = "Argument[" + idx + "]" }
130+
131+
override string getMaDOutput() { none() }
124132

125133
override Top asTop() { result = this.getCall() }
126134

127135
override DataFlow::Node asNode() { result = vararg }
128136

129-
override string getExtensibleType() { result = "sinkModel" }
130-
131137
override string toString() { result = vararg.toString() }
132138
}
133139

@@ -143,12 +149,12 @@ class MethodCall extends ApplicationModeEndpoint, TMethodCall {
143149

144150
override string getMaDInput() { result = "Argument[this]" }
145151

152+
override string getMaDOutput() { result = "ReturnValue" }
153+
146154
override Top asTop() { result = call }
147155

148156
override DataFlow::Node asNode() { result.asExpr() = call }
149157

150-
override string getExtensibleType() { result = "sourceModel" }
151-
152158
override string toString() { result = call.toString() }
153159
}
154160

@@ -210,7 +216,6 @@ module ApplicationCandidatesImpl implements SharedCharacteristics::CandidateSig
210216
additional predicate sinkSpec(
211217
Endpoint e, string package, string type, string name, string signature, string ext, string input
212218
) {
213-
e.getExtensibleType() = "sinkModel" and
214219
ApplicationModeGetCallable::getCallable(e).hasQualifiedName(package, type, name) and
215220
signature = ExternalFlow::paramsString(ApplicationModeGetCallable::getCallable(e)) and
216221
ext = "" and
@@ -269,11 +274,12 @@ class ApplicationModeMetadataExtractor extends string {
269274

270275
predicate hasMetadata(
271276
Endpoint e, string package, string type, string subtypes, string name, string signature,
272-
string input, string isVarargsArray
277+
string input, string output, string isVarargsArray
273278
) {
274279
exists(Callable callable |
275280
e.getCall().getCallee() = callable and
276-
input = e.getMaDInput() and
281+
(if exists(e.getMaDInput()) then input = e.getMaDInput() else input = "") and
282+
(if exists(e.getMaDOutput()) then output = e.getMaDOutput() else output = "") and
277283
package = callable.getDeclaringType().getPackage().getName() and
278284
// we're using the erased types because the MaD convention is to not specify type parameters.
279285
// Whether something is or isn't a sink doesn't usually depend on the type parameters.

java/ql/automodel/src/AutomodelApplicationModeExtractCandidates.ql

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,20 +25,20 @@ private import AutomodelJavaUtil
2525
bindingset[limit]
2626
private Endpoint getSampleForSignature(
2727
int limit, string package, string type, string subtypes, string name, string signature,
28-
string input, string isVarargs, string extensibleType
28+
string input, string output, string isVarargs, string extensibleType
2929
) {
3030
exists(int n, int num_endpoints, ApplicationModeMetadataExtractor meta |
3131
num_endpoints =
3232
count(Endpoint e |
3333
e.getExtensibleType() = extensibleType and
34-
meta.hasMetadata(e, package, type, subtypes, name, signature, input, isVarargs)
34+
meta.hasMetadata(e, package, type, subtypes, name, signature, input, output, isVarargs)
3535
)
3636
|
3737
result =
3838
rank[n](Endpoint e, Location loc |
3939
loc = e.asTop().getLocation() and
4040
e.getExtensibleType() = extensibleType and
41-
meta.hasMetadata(e, package, type, subtypes, name, signature, input, isVarargs)
41+
meta.hasMetadata(e, package, type, subtypes, name, signature, input, output, isVarargs)
4242
|
4343
e
4444
order by
@@ -57,13 +57,15 @@ private Endpoint getSampleForSignature(
5757
from
5858
Endpoint endpoint, string message, ApplicationModeMetadataExtractor meta, DollarAtString package,
5959
DollarAtString type, DollarAtString subtypes, DollarAtString name, DollarAtString signature,
60-
DollarAtString input, DollarAtString isVarargsArray, DollarAtString alreadyAiModeled, DollarAtString extensibleType
60+
DollarAtString input, DollarAtString output, DollarAtString isVarargsArray,
61+
DollarAtString alreadyAiModeled, DollarAtString extensibleType
6162
where
6263
not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u |
6364
u.appliesToEndpoint(endpoint)
6465
) and
6566
endpoint =
66-
getSampleForSignature(9, package, type, subtypes, name, signature, input, isVarargsArray, extensibleType) and
67+
getSampleForSignature(9, package, type, subtypes, name, signature, input, output,
68+
isVarargsArray, extensibleType) and
6769
// If a node is already a known sink for any of our existing ATM queries and is already modeled as a MaD sink, we
6870
// don't include it as a candidate. Otherwise, we might include it as a candidate for query A, but the model will
6971
// label it as a sink for one of the sink types of query B, for which it's already a known sink. This would result in
@@ -75,7 +77,7 @@ where
7577
alreadyAiModeled.matches("%ai-%") and
7678
CharacteristicsImpl::isSink(endpoint, _, alreadyAiModeled)
7779
) and
78-
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, isVarargsArray) and
80+
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, isVarargsArray) and
7981
includeAutomodelCandidate(package, type, name, signature) and
8082
// The message is the concatenation of all sink types for which this endpoint is known neither to be a sink nor to be
8183
// a non-sink, and we surface only endpoints that have at least one such sink type.
@@ -87,14 +89,15 @@ where
8789
sinkType, ", "
8890
)
8991
select endpoint.asNode(),
90-
message + "\nrelated locations: $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@.", //
92+
message + "\nrelated locations: $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@.", //
9193
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext()), "CallContext", //
9294
package, "package", //
9395
type, "type", //
9496
subtypes, "subtypes", //
9597
name, "name", // method name
9698
signature, "signature", //
9799
input, "input", //
100+
output, "output", //
98101
isVarargsArray, "isVarargsArray", //
99102
alreadyAiModeled, "alreadyAiModeled", //
100103
extensibleType, "extensibleType"

java/ql/automodel/src/AutomodelApplicationModeExtractNegativeExamples.ql

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,15 +44,15 @@ from
4444
Endpoint endpoint, EndpointCharacteristic characteristic, float confidence, string message,
4545
ApplicationModeMetadataExtractor meta, DollarAtString package, DollarAtString type,
4646
DollarAtString subtypes, DollarAtString name, DollarAtString signature, DollarAtString input,
47-
DollarAtString isVarargsArray
47+
DollarAtString output, DollarAtString isVarargsArray
4848
where
4949
endpoint = getSampleForCharacteristic(characteristic, 100) and
5050
confidence >= SharedCharacteristics::highConfidence() and
5151
characteristic.hasImplications(any(NegativeSinkType negative), true, confidence) and
5252
// Exclude endpoints that have contradictory endpoint characteristics, because we only want examples we're highly
5353
// certain about in the prompt.
5454
not erroneousEndpoints(endpoint, _, _, _, _, false) and
55-
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, isVarargsArray) and
55+
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, isVarargsArray) and
5656
// It's valid for a node to satisfy the logic for both `isSink` and `isSanitizer`, but in that case it will be
5757
// treated by the actual query as a sanitizer, since the final logic is something like
5858
// `isSink(n) and not isSanitizer(n)`. We don't want to include such nodes as negative examples in the prompt, because
@@ -65,12 +65,13 @@ where
6565
) and
6666
message = characteristic
6767
select endpoint.asNode(),
68-
message + "\nrelated locations: $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@.", //
68+
message + "\nrelated locations: $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@.", //
6969
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext()), "CallContext", //
7070
package, "package", //
7171
type, "type", //
7272
subtypes, "subtypes", //
7373
name, "name", //
7474
signature, "signature", //
7575
input, "input", //
76+
output, "output", //
7677
isVarargsArray, "isVarargsArray" //

java/ql/automodel/src/AutomodelApplicationModeExtractPositiveExamples.ql

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,22 +15,23 @@ private import AutomodelJavaUtil
1515
from
1616
Endpoint endpoint, SinkType sinkType, ApplicationModeMetadataExtractor meta,
1717
DollarAtString package, DollarAtString type, DollarAtString subtypes, DollarAtString name,
18-
DollarAtString signature, DollarAtString input, DollarAtString isVarargsArray
18+
DollarAtString signature, DollarAtString input, DollarAtString output, DollarAtString isVarargsArray
1919
where
2020
// Exclude endpoints that have contradictory endpoint characteristics, because we only want examples we're highly
2121
// certain about in the prompt.
2222
not erroneousEndpoints(endpoint, _, _, _, _, false) and
23-
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, isVarargsArray) and
23+
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, isVarargsArray) and
2424
// Extract positive examples of sinks belonging to the existing ATM query configurations.
2525
CharacteristicsImpl::isKnownSink(endpoint, sinkType, _) and
2626
exists(CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext()))
2727
select endpoint.asNode(),
28-
sinkType + "\nrelated locations: $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@.", //
28+
sinkType + "\nrelated locations: $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@.", //
2929
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext()), "CallContext", //
3030
package, "package", //
3131
type, "type", //
3232
subtypes, "subtypes", //
3333
name, "name", //
3434
signature, "signature", //
3535
input, "input", //
36+
output, "output", //
3637
isVarargsArray, "isVarargsArray"

0 commit comments

Comments
 (0)